KVM: selftests: Use canonical $(ARCH) paths for KVM selftests directories
authorSean Christopherson <seanjc@google.com>
Thu, 28 Nov 2024 00:55:46 +0000 (16:55 -0800)
committerSean Christopherson <seanjc@google.com>
Wed, 18 Dec 2024 22:15:04 +0000 (14:15 -0800)
Use the kernel's canonical $(ARCH) paths instead of the raw target triple
for KVM selftests directories.  KVM selftests are quite nearly the only
place in the entire kernel that using the target triple for directories,
tools/testing/selftests/drivers/s390x being the lone holdout.

Using the kernel's preferred nomenclature eliminates the minor, but
annoying, friction of having to translate to KVM's selftests directories,
e.g. for pattern matching, opening files, running selftests, etc.

Opportunsitically delete file comments that reference the full path of the
file, as they are obviously prone to becoming stale, and serve no known
purpose.

Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Acked-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/20241128005547.4077116-16-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
299 files changed:
MAINTAINERS
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/Makefile.kvm
tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c [deleted file]
tools/testing/selftests/kvm/aarch64/arch_timer.c [deleted file]
tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c [deleted file]
tools/testing/selftests/kvm/aarch64/debug-exceptions.c [deleted file]
tools/testing/selftests/kvm/aarch64/get-reg-list.c [deleted file]
tools/testing/selftests/kvm/aarch64/hypercalls.c [deleted file]
tools/testing/selftests/kvm/aarch64/mmio_abort.c [deleted file]
tools/testing/selftests/kvm/aarch64/no-vgic-v3.c [deleted file]
tools/testing/selftests/kvm/aarch64/page_fault_test.c [deleted file]
tools/testing/selftests/kvm/aarch64/psci_test.c [deleted file]
tools/testing/selftests/kvm/aarch64/set_id_regs.c [deleted file]
tools/testing/selftests/kvm/aarch64/smccc_filter.c [deleted file]
tools/testing/selftests/kvm/aarch64/vcpu_width_config.c [deleted file]
tools/testing/selftests/kvm/aarch64/vgic_init.c [deleted file]
tools/testing/selftests/kvm/aarch64/vgic_irq.c [deleted file]
tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c [deleted file]
tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c [deleted file]
tools/testing/selftests/kvm/arm64/aarch32_id_regs.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/arch_timer.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/debug-exceptions.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/get-reg-list.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/hypercalls.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/mmio_abort.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/no-vgic-v3.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/page_fault_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/psci_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/set_id_regs.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/smccc_filter.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/vcpu_width_config.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/vgic_init.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/vgic_irq.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c [new file with mode: 0644]
tools/testing/selftests/kvm/arm64/vpmu_counter_access.c [new file with mode: 0644]
tools/testing/selftests/kvm/dirty_log_perf_test.c
tools/testing/selftests/kvm/include/aarch64/arch_timer.h [deleted file]
tools/testing/selftests/kvm/include/aarch64/delay.h [deleted file]
tools/testing/selftests/kvm/include/aarch64/gic.h [deleted file]
tools/testing/selftests/kvm/include/aarch64/gic_v3.h [deleted file]
tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h [deleted file]
tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h [deleted file]
tools/testing/selftests/kvm/include/aarch64/processor.h [deleted file]
tools/testing/selftests/kvm/include/aarch64/spinlock.h [deleted file]
tools/testing/selftests/kvm/include/aarch64/ucall.h [deleted file]
tools/testing/selftests/kvm/include/aarch64/vgic.h [deleted file]
tools/testing/selftests/kvm/include/arm64/arch_timer.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/arm64/delay.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/arm64/gic.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/arm64/gic_v3.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/arm64/gic_v3_its.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/arm64/processor.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/arm64/spinlock.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/arm64/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/arm64/vgic.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390/debug_print.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390/diag318_test_handler.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390/facility.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390/kvm_util_arch.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390/processor.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390/sie.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390x/debug_print.h [deleted file]
tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h [deleted file]
tools/testing/selftests/kvm/include/s390x/facility.h [deleted file]
tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h [deleted file]
tools/testing/selftests/kvm/include/s390x/processor.h [deleted file]
tools/testing/selftests/kvm/include/s390x/sie.h [deleted file]
tools/testing/selftests/kvm/include/s390x/ucall.h [deleted file]
tools/testing/selftests/kvm/include/x86/apic.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/evmcs.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/hyperv.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/kvm_util_arch.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/mce.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/pmu.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/processor.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/sev.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/svm.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/svm_util.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86/vmx.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86_64/apic.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/evmcs.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/hyperv.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/mce.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/pmu.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/processor.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/sev.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/svm.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/svm_util.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/ucall.h [deleted file]
tools/testing/selftests/kvm/include/x86_64/vmx.h [deleted file]
tools/testing/selftests/kvm/lib/aarch64/gic.c [deleted file]
tools/testing/selftests/kvm/lib/aarch64/gic_private.h [deleted file]
tools/testing/selftests/kvm/lib/aarch64/gic_v3.c [deleted file]
tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c [deleted file]
tools/testing/selftests/kvm/lib/aarch64/handlers.S [deleted file]
tools/testing/selftests/kvm/lib/aarch64/processor.c [deleted file]
tools/testing/selftests/kvm/lib/aarch64/spinlock.c [deleted file]
tools/testing/selftests/kvm/lib/aarch64/ucall.c [deleted file]
tools/testing/selftests/kvm/lib/aarch64/vgic.c [deleted file]
tools/testing/selftests/kvm/lib/arm64/gic.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/arm64/gic_private.h [new file with mode: 0644]
tools/testing/selftests/kvm/lib/arm64/gic_v3.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/arm64/handlers.S [new file with mode: 0644]
tools/testing/selftests/kvm/lib/arm64/processor.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/arm64/spinlock.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/arm64/ucall.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/arm64/vgic.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/s390/facility.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/s390/processor.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/s390/ucall.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c [deleted file]
tools/testing/selftests/kvm/lib/s390x/facility.c [deleted file]
tools/testing/selftests/kvm/lib/s390x/processor.c [deleted file]
tools/testing/selftests/kvm/lib/s390x/ucall.c [deleted file]
tools/testing/selftests/kvm/lib/x86/apic.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86/handlers.S [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86/hyperv.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86/memstress.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86/pmu.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86/processor.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86/sev.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86/svm.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86/ucall.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86/vmx.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86_64/apic.c [deleted file]
tools/testing/selftests/kvm/lib/x86_64/handlers.S [deleted file]
tools/testing/selftests/kvm/lib/x86_64/hyperv.c [deleted file]
tools/testing/selftests/kvm/lib/x86_64/memstress.c [deleted file]
tools/testing/selftests/kvm/lib/x86_64/pmu.c [deleted file]
tools/testing/selftests/kvm/lib/x86_64/processor.c [deleted file]
tools/testing/selftests/kvm/lib/x86_64/sev.c [deleted file]
tools/testing/selftests/kvm/lib/x86_64/svm.c [deleted file]
tools/testing/selftests/kvm/lib/x86_64/ucall.c [deleted file]
tools/testing/selftests/kvm/lib/x86_64/vmx.c [deleted file]
tools/testing/selftests/kvm/s390/cmma_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390/config [new file with mode: 0644]
tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390/debug_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390/memop.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390/resets.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390/shared_zeropage_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390/sync_regs_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390/tprot.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390/ucontrol_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390x/cmma_test.c [deleted file]
tools/testing/selftests/kvm/s390x/config [deleted file]
tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c [deleted file]
tools/testing/selftests/kvm/s390x/debug_test.c [deleted file]
tools/testing/selftests/kvm/s390x/memop.c [deleted file]
tools/testing/selftests/kvm/s390x/resets.c [deleted file]
tools/testing/selftests/kvm/s390x/shared_zeropage_test.c [deleted file]
tools/testing/selftests/kvm/s390x/sync_regs_test.c [deleted file]
tools/testing/selftests/kvm/s390x/tprot.c [deleted file]
tools/testing/selftests/kvm/s390x/ucontrol_test.c [deleted file]
tools/testing/selftests/kvm/set_memory_region_test.c
tools/testing/selftests/kvm/x86/amx_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/apic_bus_clock_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/cpuid_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/debug_regs.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/feature_msrs_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/fix_hypercall_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/flds_emulation.h [new file with mode: 0644]
tools/testing/selftests/kvm/x86/hwcr_msr_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/hyperv_clock.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/hyperv_cpuid.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/hyperv_evmcs.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/hyperv_features.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/hyperv_ipi.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/hyperv_svm_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/kvm_clock_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/kvm_pv_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/monitor_mwait_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/nested_exceptions_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/nx_huge_pages_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh [new file with mode: 0755]
tools/testing/selftests/kvm/x86/platform_info_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/pmu_counters_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/pmu_event_filter_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/private_mem_conversions_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/recalc_apic_map_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/set_boot_cpu_id.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/set_sregs_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/sev_init2_tests.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/sev_migrate_tests.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/sev_smoke_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/smm_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/state_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/svm_int_ctl_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/svm_vmcall_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/sync_regs_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/triple_fault_event_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/tsc_msrs_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/tsc_scaling_sync.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/ucna_injection_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/userspace_io_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_apic_access_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_msrs_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/xapic_ipi_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/xapic_state_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/xen_shinfo_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/xen_vmcall_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86/xss_msr_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/amx_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/cpuid_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/debug_regs.c [deleted file]
tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/feature_msrs_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/flds_emulation.h [deleted file]
tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/hyperv_clock.c [deleted file]
tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c [deleted file]
tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c [deleted file]
tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c [deleted file]
tools/testing/selftests/kvm/x86_64/hyperv_features.c [deleted file]
tools/testing/selftests/kvm/x86_64/hyperv_ipi.c [deleted file]
tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c [deleted file]
tools/testing/selftests/kvm/x86_64/kvm_clock_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/kvm_pv_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh [deleted file]
tools/testing/selftests/kvm/x86_64/platform_info_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/pmu_counters_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c [deleted file]
tools/testing/selftests/kvm/x86_64/set_sregs_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/sev_init2_tests.c [deleted file]
tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c [deleted file]
tools/testing/selftests/kvm/x86_64/sev_smoke_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/smm_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/state_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/sync_regs_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c [deleted file]
tools/testing/selftests/kvm/x86_64/ucna_injection_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/userspace_io_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/xapic_state_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/xss_msr_test.c [deleted file]

index 17daa9ee9384509c1ef3f2a3825a4594eab88741..2dba81d2fa6f210e78f3ccfc564049733c20e393 100644 (file)
@@ -12605,8 +12605,8 @@ F:      arch/arm64/include/asm/kvm*
 F:     arch/arm64/include/uapi/asm/kvm*
 F:     arch/arm64/kvm/
 F:     include/kvm/arm_*
-F:     tools/testing/selftests/kvm/*/aarch64/
-F:     tools/testing/selftests/kvm/aarch64/
+F:     tools/testing/selftests/kvm/*/arm64/
+F:     tools/testing/selftests/kvm/arm64/
 
 KERNEL VIRTUAL MACHINE FOR LOONGARCH (KVM/LoongArch)
 M:     Tianrui Zhao <zhaotianrui@loongson.cn>
@@ -12677,8 +12677,8 @@ F:      arch/s390/kvm/
 F:     arch/s390/mm/gmap.c
 F:     drivers/s390/char/uvdevice.c
 F:     tools/testing/selftests/drivers/s390x/uvdevice/
-F:     tools/testing/selftests/kvm/*/s390x/
-F:     tools/testing/selftests/kvm/s390x/
+F:     tools/testing/selftests/kvm/*/s390/
+F:     tools/testing/selftests/kvm/s390/
 
 KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
 M:     Sean Christopherson <seanjc@google.com>
@@ -12695,8 +12695,8 @@ F:      arch/x86/include/uapi/asm/svm.h
 F:     arch/x86/include/uapi/asm/vmx.h
 F:     arch/x86/kvm/
 F:     arch/x86/kvm/*/
-F:     tools/testing/selftests/kvm/*/x86_64/
-F:     tools/testing/selftests/kvm/x86_64/
+F:     tools/testing/selftests/kvm/*/x86/
+F:     tools/testing/selftests/kvm/x86/
 
 KERNFS
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
index 7b33464bf8cc5c88287be667b3f766906a643283..9bc2eba1af1c4ce744843b6bba4e18f0c77756a9 100644 (file)
@@ -4,16 +4,12 @@ include $(top_srcdir)/scripts/subarch.include
 ARCH            ?= $(SUBARCH)
 
 ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
-ifeq ($(ARCH),x86)
-       ARCH_DIR := x86_64
-else ifeq ($(ARCH),arm64)
-       ARCH_DIR := aarch64
-else ifeq ($(ARCH),s390)
-       ARCH_DIR := s390x
+# Top-level selftests allows ARCH=x86_64 :-(
+ifeq ($(ARCH),x86_64)
+       ARCH_DIR := x86
 else
        ARCH_DIR := $(ARCH)
 endif
-
 include Makefile.kvm
 else
 # Empty targets for unsupported architectures
index e988a72f8c20cd5cbda3646341ac6e6a1e1a2f00..9888dd6bb483ceb4189d81c740e334125c442020 100644 (file)
@@ -18,177 +18,177 @@ LIBKVM += lib/userfaultfd_util.c
 
 LIBKVM_STRING += lib/string_override.c
 
-LIBKVM_x86_64 += lib/x86_64/apic.c
-LIBKVM_x86_64 += lib/x86_64/handlers.S
-LIBKVM_x86_64 += lib/x86_64/hyperv.c
-LIBKVM_x86_64 += lib/x86_64/memstress.c
-LIBKVM_x86_64 += lib/x86_64/pmu.c
-LIBKVM_x86_64 += lib/x86_64/processor.c
-LIBKVM_x86_64 += lib/x86_64/sev.c
-LIBKVM_x86_64 += lib/x86_64/svm.c
-LIBKVM_x86_64 += lib/x86_64/ucall.c
-LIBKVM_x86_64 += lib/x86_64/vmx.c
-
-LIBKVM_aarch64 += lib/aarch64/gic.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c
-LIBKVM_aarch64 += lib/aarch64/handlers.S
-LIBKVM_aarch64 += lib/aarch64/processor.c
-LIBKVM_aarch64 += lib/aarch64/spinlock.c
-LIBKVM_aarch64 += lib/aarch64/ucall.c
-LIBKVM_aarch64 += lib/aarch64/vgic.c
-
-LIBKVM_s390x += lib/s390x/diag318_test_handler.c
-LIBKVM_s390x += lib/s390x/processor.c
-LIBKVM_s390x += lib/s390x/ucall.c
-LIBKVM_s390x += lib/s390x/facility.c
+LIBKVM_x86 += lib/x86/apic.c
+LIBKVM_x86 += lib/x86/handlers.S
+LIBKVM_x86 += lib/x86/hyperv.c
+LIBKVM_x86 += lib/x86/memstress.c
+LIBKVM_x86 += lib/x86/pmu.c
+LIBKVM_x86 += lib/x86/processor.c
+LIBKVM_x86 += lib/x86/sev.c
+LIBKVM_x86 += lib/x86/svm.c
+LIBKVM_x86 += lib/x86/ucall.c
+LIBKVM_x86 += lib/x86/vmx.c
+
+LIBKVM_arm64 += lib/arm64/gic.c
+LIBKVM_arm64 += lib/arm64/gic_v3.c
+LIBKVM_arm64 += lib/arm64/gic_v3_its.c
+LIBKVM_arm64 += lib/arm64/handlers.S
+LIBKVM_arm64 += lib/arm64/processor.c
+LIBKVM_arm64 += lib/arm64/spinlock.c
+LIBKVM_arm64 += lib/arm64/ucall.c
+LIBKVM_arm64 += lib/arm64/vgic.c
+
+LIBKVM_s390 += lib/s390/diag318_test_handler.c
+LIBKVM_s390 += lib/s390/processor.c
+LIBKVM_s390 += lib/s390/ucall.c
+LIBKVM_s390 += lib/s390/facility.c
 
 LIBKVM_riscv += lib/riscv/handlers.S
 LIBKVM_riscv += lib/riscv/processor.c
 LIBKVM_riscv += lib/riscv/ucall.c
 
 # Non-compiled test targets
-TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
+TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
 
 # Compiled test targets
-TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
-TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
-TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
-TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
-TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
-TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
-TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
-TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
-TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
-TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
-TEST_GEN_PROGS_x86_64 += x86_64/smm_test
-TEST_GEN_PROGS_x86_64 += x86_64/state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
-TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
-TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
-TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
-TEST_GEN_PROGS_x86_64 += x86_64/amx_test
-TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
-TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
-TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
-TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
-TEST_GEN_PROGS_x86_64 += coalesced_io_test
-TEST_GEN_PROGS_x86_64 += demand_paging_test
-TEST_GEN_PROGS_x86_64 += dirty_log_test
-TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
-TEST_GEN_PROGS_x86_64 += guest_memfd_test
-TEST_GEN_PROGS_x86_64 += guest_print_test
-TEST_GEN_PROGS_x86_64 += hardware_disable_test
-TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86_64 += kvm_page_table_test
-TEST_GEN_PROGS_x86_64 += mmu_stress_test
-TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
-TEST_GEN_PROGS_x86_64 += memslot_perf_test
-TEST_GEN_PROGS_x86_64 += rseq_test
-TEST_GEN_PROGS_x86_64 += set_memory_region_test
-TEST_GEN_PROGS_x86_64 += steal_time
-TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
-TEST_GEN_PROGS_x86_64 += system_counter_offset_test
-TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
+TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
+TEST_GEN_PROGS_x86 += x86/feature_msrs_test
+TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
+TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
+TEST_GEN_PROGS_x86 += x86/hyperv_clock
+TEST_GEN_PROGS_x86 += x86/hyperv_cpuid
+TEST_GEN_PROGS_x86 += x86/hyperv_evmcs
+TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls
+TEST_GEN_PROGS_x86 += x86/hyperv_features
+TEST_GEN_PROGS_x86 += x86/hyperv_ipi
+TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
+TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
+TEST_GEN_PROGS_x86 += x86/kvm_clock_test
+TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/platform_info_test
+TEST_GEN_PROGS_x86 += x86/pmu_counters_test
+TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
+TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test
+TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id
+TEST_GEN_PROGS_x86 += x86/set_sregs_test
+TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test
+TEST_GEN_PROGS_x86 += x86/smm_test
+TEST_GEN_PROGS_x86 += x86/state_test
+TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
+TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
+TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
+TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync
+TEST_GEN_PROGS_x86 += x86/sync_regs_test
+TEST_GEN_PROGS_x86 += x86/ucna_injection_test
+TEST_GEN_PROGS_x86 += x86/userspace_io_test
+TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
+TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
+TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
+TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
+TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
+TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
+TEST_GEN_PROGS_x86 += x86/xss_msr_test
+TEST_GEN_PROGS_x86 += x86/debug_regs
+TEST_GEN_PROGS_x86 += x86/tsc_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test
+TEST_GEN_PROGS_x86 += x86/xen_shinfo_test
+TEST_GEN_PROGS_x86 += x86/xen_vmcall_test
+TEST_GEN_PROGS_x86 += x86/sev_init2_tests
+TEST_GEN_PROGS_x86 += x86/sev_migrate_tests
+TEST_GEN_PROGS_x86 += x86/sev_smoke_test
+TEST_GEN_PROGS_x86 += x86/amx_test
+TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
+TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
+TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += access_tracking_perf_test
+TEST_GEN_PROGS_x86 += coalesced_io_test
+TEST_GEN_PROGS_x86 += demand_paging_test
+TEST_GEN_PROGS_x86 += dirty_log_test
+TEST_GEN_PROGS_x86 += dirty_log_perf_test
+TEST_GEN_PROGS_x86 += guest_memfd_test
+TEST_GEN_PROGS_x86 += guest_print_test
+TEST_GEN_PROGS_x86 += hardware_disable_test
+TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86 += kvm_page_table_test
+TEST_GEN_PROGS_x86 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86 += memslot_perf_test
+TEST_GEN_PROGS_x86 += mmu_stress_test
+TEST_GEN_PROGS_x86 += rseq_test
+TEST_GEN_PROGS_x86 += set_memory_region_test
+TEST_GEN_PROGS_x86 += steal_time
+TEST_GEN_PROGS_x86 += kvm_binary_stats_test
+TEST_GEN_PROGS_x86 += system_counter_offset_test
+TEST_GEN_PROGS_x86 += pre_fault_memory_test
 
 # Compiled outputs used by test targets
-TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
-
-TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases
-TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
-TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
-TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort
-TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
-TEST_GEN_PROGS_aarch64 += aarch64/psci_test
-TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
-TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
-TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
-TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
-TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
-TEST_GEN_PROGS_aarch64 += arch_timer
-TEST_GEN_PROGS_aarch64 += coalesced_io_test
-TEST_GEN_PROGS_aarch64 += demand_paging_test
-TEST_GEN_PROGS_aarch64 += dirty_log_test
-TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
-TEST_GEN_PROGS_aarch64 += guest_print_test
-TEST_GEN_PROGS_aarch64 += get-reg-list
-TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_aarch64 += kvm_page_table_test
-TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
-TEST_GEN_PROGS_aarch64 += memslot_perf_test
-TEST_GEN_PROGS_aarch64 += mmu_stress_test
-TEST_GEN_PROGS_aarch64 += rseq_test
-TEST_GEN_PROGS_aarch64 += set_memory_region_test
-TEST_GEN_PROGS_aarch64 += steal_time
-TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
-
-TEST_GEN_PROGS_s390x = s390x/memop
-TEST_GEN_PROGS_s390x += s390x/resets
-TEST_GEN_PROGS_s390x += s390x/sync_regs_test
-TEST_GEN_PROGS_s390x += s390x/tprot
-TEST_GEN_PROGS_s390x += s390x/cmma_test
-TEST_GEN_PROGS_s390x += s390x/debug_test
-TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test
-TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test
-TEST_GEN_PROGS_s390x += s390x/ucontrol_test
-TEST_GEN_PROGS_s390x += demand_paging_test
-TEST_GEN_PROGS_s390x += dirty_log_test
-TEST_GEN_PROGS_s390x += guest_print_test
-TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390x += kvm_page_table_test
-TEST_GEN_PROGS_s390x += rseq_test
-TEST_GEN_PROGS_s390x += set_memory_region_test
-TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+
+TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
+TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/hypercalls
+TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/page_fault_test
+TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/set_id_regs
+TEST_GEN_PROGS_arm64 += arm64/smccc_filter
+TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
+TEST_GEN_PROGS_arm64 += arm64/vgic_init
+TEST_GEN_PROGS_arm64 += arm64/vgic_irq
+TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
+TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
+TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += access_tracking_perf_test
+TEST_GEN_PROGS_arm64 += arch_timer
+TEST_GEN_PROGS_arm64 += coalesced_io_test
+TEST_GEN_PROGS_arm64 += demand_paging_test
+TEST_GEN_PROGS_arm64 += dirty_log_test
+TEST_GEN_PROGS_arm64 += dirty_log_perf_test
+TEST_GEN_PROGS_arm64 += guest_print_test
+TEST_GEN_PROGS_arm64 += get-reg-list
+TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_arm64 += kvm_page_table_test
+TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
+TEST_GEN_PROGS_arm64 += memslot_perf_test
+TEST_GEN_PROGS_arm64 += mmu_stress_test
+TEST_GEN_PROGS_arm64 += rseq_test
+TEST_GEN_PROGS_arm64 += set_memory_region_test
+TEST_GEN_PROGS_arm64 += steal_time
+TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
+
+TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 += s390/resets
+TEST_GEN_PROGS_s390 += s390/sync_regs_test
+TEST_GEN_PROGS_s390 += s390/tprot
+TEST_GEN_PROGS_s390 += s390/cmma_test
+TEST_GEN_PROGS_s390 += s390/debug_test
+TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
+TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
+TEST_GEN_PROGS_s390 += s390/ucontrol_test
+TEST_GEN_PROGS_s390 += demand_paging_test
+TEST_GEN_PROGS_s390 += dirty_log_test
+TEST_GEN_PROGS_s390 += guest_print_test
+TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390 += kvm_page_table_test
+TEST_GEN_PROGS_s390 += rseq_test
+TEST_GEN_PROGS_s390 += set_memory_region_test
+TEST_GEN_PROGS_s390 += kvm_binary_stats_test
 
 TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
 TEST_GEN_PROGS_riscv += riscv/ebreak_test
@@ -222,11 +222,7 @@ include ../lib.mk
 INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
 LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
-ifeq ($(ARCH),x86_64)
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
-else
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
-endif
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH_DIR)/include
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
        -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
        -fno-builtin-memcmp -fno-builtin-memcpy \
diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
deleted file mode 100644 (file)
index 447d61c..0000000
+++ /dev/null
@@ -1,167 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * aarch32_id_regs - Test for ID register behavior on AArch64-only systems
- *
- * Copyright (c) 2022 Google LLC.
- *
- * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ
- * and WI from userspace.
- */
-
-#include <stdint.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-#include <linux/bitfield.h>
-
-#define BAD_ID_REG_VAL 0x1badc0deul
-
-#define GUEST_ASSERT_REG_RAZ(reg)      GUEST_ASSERT_EQ(read_sysreg_s(reg), 0)
-
-static void guest_main(void)
-{
-       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1);
-       GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3));
-       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1);
-       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1);
-       GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7));
-
-       GUEST_DONE();
-}
-
-static void test_guest_raz(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-       }
-}
-
-static uint64_t raz_wi_reg_ids[] = {
-       KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1),
-       KVM_ARM64_SYS_REG(SYS_MVFR0_EL1),
-       KVM_ARM64_SYS_REG(SYS_MVFR1_EL1),
-       KVM_ARM64_SYS_REG(SYS_MVFR2_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1),
-       KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1),
-};
-
-static void test_user_raz_wi(struct kvm_vcpu *vcpu)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
-               uint64_t reg_id = raz_wi_reg_ids[i];
-               uint64_t val;
-
-               val = vcpu_get_reg(vcpu, reg_id);
-               TEST_ASSERT_EQ(val, 0);
-
-               /*
-                * Expect the ioctl to succeed with no effect on the register
-                * value.
-                */
-               vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
-
-               val = vcpu_get_reg(vcpu, reg_id);
-               TEST_ASSERT_EQ(val, 0);
-       }
-}
-
-static uint64_t raz_invariant_reg_ids[] = {
-       KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
-       KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
-       KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
-       KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)),
-};
-
-static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
-{
-       int i, r;
-
-       for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
-               uint64_t reg_id = raz_invariant_reg_ids[i];
-               uint64_t val;
-
-               val = vcpu_get_reg(vcpu, reg_id);
-               TEST_ASSERT_EQ(val, 0);
-
-               r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
-               TEST_ASSERT(r < 0 && errno == EINVAL,
-                           "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
-
-               val = vcpu_get_reg(vcpu, reg_id);
-               TEST_ASSERT_EQ(val, 0);
-       }
-}
-
-
-
-static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
-{
-       uint64_t val, el0;
-
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-
-       el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
-       return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       TEST_REQUIRE(vcpu_aarch64_only(vcpu));
-
-       test_user_raz_wi(vcpu);
-       test_user_raz_invariant(vcpu);
-       test_guest_raz(vcpu);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
deleted file mode 100644 (file)
index eeba1cc..0000000
+++ /dev/null
@@ -1,220 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * The test validates both the virtual and physical timer IRQs using
- * CVAL and TVAL registers.
- *
- * Copyright (c) 2021, Google LLC.
- */
-#include "arch_timer.h"
-#include "delay.h"
-#include "gic.h"
-#include "processor.h"
-#include "timer_test.h"
-#include "ucall_common.h"
-#include "vgic.h"
-
-enum guest_stage {
-       GUEST_STAGE_VTIMER_CVAL = 1,
-       GUEST_STAGE_VTIMER_TVAL,
-       GUEST_STAGE_PTIMER_CVAL,
-       GUEST_STAGE_PTIMER_TVAL,
-       GUEST_STAGE_MAX,
-};
-
-static int vtimer_irq, ptimer_irq;
-
-static void
-guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
-{
-       switch (shared_data->guest_stage) {
-       case GUEST_STAGE_VTIMER_CVAL:
-               timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
-               shared_data->xcnt = timer_get_cntct(VIRTUAL);
-               timer_set_ctl(VIRTUAL, CTL_ENABLE);
-               break;
-       case GUEST_STAGE_VTIMER_TVAL:
-               timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
-               shared_data->xcnt = timer_get_cntct(VIRTUAL);
-               timer_set_ctl(VIRTUAL, CTL_ENABLE);
-               break;
-       case GUEST_STAGE_PTIMER_CVAL:
-               timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
-               shared_data->xcnt = timer_get_cntct(PHYSICAL);
-               timer_set_ctl(PHYSICAL, CTL_ENABLE);
-               break;
-       case GUEST_STAGE_PTIMER_TVAL:
-               timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
-               shared_data->xcnt = timer_get_cntct(PHYSICAL);
-               timer_set_ctl(PHYSICAL, CTL_ENABLE);
-               break;
-       default:
-               GUEST_ASSERT(0);
-       }
-}
-
-static void guest_validate_irq(unsigned int intid,
-                               struct test_vcpu_shared_data *shared_data)
-{
-       enum guest_stage stage = shared_data->guest_stage;
-       uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
-       unsigned long xctl = 0;
-       unsigned int timer_irq = 0;
-       unsigned int accessor;
-
-       if (intid == IAR_SPURIOUS)
-               return;
-
-       switch (stage) {
-       case GUEST_STAGE_VTIMER_CVAL:
-       case GUEST_STAGE_VTIMER_TVAL:
-               accessor = VIRTUAL;
-               timer_irq = vtimer_irq;
-               break;
-       case GUEST_STAGE_PTIMER_CVAL:
-       case GUEST_STAGE_PTIMER_TVAL:
-               accessor = PHYSICAL;
-               timer_irq = ptimer_irq;
-               break;
-       default:
-               GUEST_ASSERT(0);
-               return;
-       }
-
-       xctl = timer_get_ctl(accessor);
-       if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
-               return;
-
-       timer_set_ctl(accessor, CTL_IMASK);
-       xcnt = timer_get_cntct(accessor);
-       cval = timer_get_cval(accessor);
-
-       xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
-
-       /* Make sure we are dealing with the correct timer IRQ */
-       GUEST_ASSERT_EQ(intid, timer_irq);
-
-       /* Basic 'timer condition met' check */
-       __GUEST_ASSERT(xcnt >= cval,
-                      "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
-                      xcnt, cval, xcnt_diff_us);
-       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
-
-       WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
-}
-
-static void guest_irq_handler(struct ex_regs *regs)
-{
-       unsigned int intid = gic_get_and_ack_irq();
-       uint32_t cpu = guest_get_vcpuid();
-       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
-
-       guest_validate_irq(intid, shared_data);
-
-       gic_set_eoi(intid);
-}
-
-static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
-                               enum guest_stage stage)
-{
-       uint32_t irq_iter, config_iter;
-
-       shared_data->guest_stage = stage;
-       shared_data->nr_iter = 0;
-
-       for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
-               /* Setup the next interrupt */
-               guest_configure_timer_action(shared_data);
-
-               /* Setup a timeout for the interrupt to arrive */
-               udelay(msecs_to_usecs(test_args.timer_period_ms) +
-                       test_args.timer_err_margin_us);
-
-               irq_iter = READ_ONCE(shared_data->nr_iter);
-               __GUEST_ASSERT(config_iter + 1 == irq_iter,
-                               "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
-                               "  Guest timer interrupt was not triggered within the specified\n"
-                               "  interval, try to increase the error margin by [-e] option.\n",
-                               config_iter + 1, irq_iter);
-       }
-}
-
-static void guest_code(void)
-{
-       uint32_t cpu = guest_get_vcpuid();
-       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
-
-       local_irq_disable();
-
-       gic_init(GIC_V3, test_args.nr_vcpus);
-
-       timer_set_ctl(VIRTUAL, CTL_IMASK);
-       timer_set_ctl(PHYSICAL, CTL_IMASK);
-
-       gic_irq_enable(vtimer_irq);
-       gic_irq_enable(ptimer_irq);
-       local_irq_enable();
-
-       guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
-       guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
-       guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
-       guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
-
-       GUEST_DONE();
-}
-
-static void test_init_timer_irq(struct kvm_vm *vm)
-{
-       /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
-       vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
-                            KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
-       vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
-                            KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
-
-       sync_global_to_guest(vm, ptimer_irq);
-       sync_global_to_guest(vm, vtimer_irq);
-
-       pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
-}
-
-static int gic_fd;
-
-struct kvm_vm *test_vm_create(void)
-{
-       struct kvm_vm *vm;
-       unsigned int i;
-       int nr_vcpus = test_args.nr_vcpus;
-
-       vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
-
-       vm_init_descriptor_tables(vm);
-       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
-
-       if (!test_args.reserved) {
-               if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
-                       struct kvm_arm_counter_offset offset = {
-                               .counter_offset = test_args.counter_offset,
-                               .reserved = 0,
-                       };
-                       vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
-               } else
-                       TEST_FAIL("no support for global offset");
-       }
-
-       for (i = 0; i < nr_vcpus; i++)
-               vcpu_init_descriptor_tables(vcpus[i]);
-
-       test_init_timer_irq(vm);
-       gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
-       __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
-
-       /* Make all the test's cmdline args visible to the guest */
-       sync_global_to_guest(vm, test_args);
-
-       return vm;
-}
-
-void test_vm_cleanup(struct kvm_vm *vm)
-{
-       close(gic_fd);
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
deleted file mode 100644 (file)
index a36a7e2..0000000
+++ /dev/null
@@ -1,1062 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
- *
- * The test validates some edge cases related to the arch-timer:
- * - timers above the max TVAL value.
- * - timers in the past
- * - moving counters ahead and behind pending timers.
- * - reprograming timers.
- * - timers fired multiple times.
- * - masking/unmasking using the timer control mask.
- *
- * Copyright (c) 2021, Google LLC.
- */
-
-#define _GNU_SOURCE
-
-#include <pthread.h>
-#include <sys/sysinfo.h>
-
-#include "arch_timer.h"
-#include "gic.h"
-#include "vgic.h"
-
-static const uint64_t CVAL_MAX = ~0ULL;
-/* tval is a signed 32-bit int. */
-static const int32_t TVAL_MAX = INT32_MAX;
-static const int32_t TVAL_MIN = INT32_MIN;
-
-/* After how much time we say there is no IRQ. */
-static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
-
-/* A nice counter value to use as the starting one for most tests. */
-static const uint64_t DEF_CNT = (CVAL_MAX / 2);
-
-/* Number of runs. */
-static const uint32_t NR_TEST_ITERS_DEF = 5;
-
-/* Default wait test time in ms. */
-static const uint32_t WAIT_TEST_MS = 10;
-
-/* Default "long" wait test time in ms. */
-static const uint32_t LONG_WAIT_TEST_MS = 100;
-
-/* Shared with IRQ handler. */
-struct test_vcpu_shared_data {
-       atomic_t handled;
-       atomic_t spurious;
-} shared_data;
-
-struct test_args {
-       /* Virtual or physical timer and counter tests. */
-       enum arch_timer timer;
-       /* Delay used for most timer tests. */
-       uint64_t wait_ms;
-       /* Delay used in the test_long_timer_delays test. */
-       uint64_t long_wait_ms;
-       /* Number of iterations. */
-       int iterations;
-       /* Whether to test the physical timer. */
-       bool test_physical;
-       /* Whether to test the virtual timer. */
-       bool test_virtual;
-};
-
-struct test_args test_args = {
-       .wait_ms = WAIT_TEST_MS,
-       .long_wait_ms = LONG_WAIT_TEST_MS,
-       .iterations = NR_TEST_ITERS_DEF,
-       .test_physical = true,
-       .test_virtual = true,
-};
-
-static int vtimer_irq, ptimer_irq;
-
-enum sync_cmd {
-       SET_COUNTER_VALUE,
-       USERSPACE_USLEEP,
-       USERSPACE_SCHED_YIELD,
-       USERSPACE_MIGRATE_SELF,
-       NO_USERSPACE_CMD,
-};
-
-typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
-
-static void sleep_poll(enum arch_timer timer, uint64_t usec);
-static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
-static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
-static void sleep_migrate(enum arch_timer timer, uint64_t usec);
-
-sleep_method_t sleep_method[] = {
-       sleep_poll,
-       sleep_sched_poll,
-       sleep_migrate,
-       sleep_in_userspace,
-};
-
-typedef void (*irq_wait_method_t)(void);
-
-static void wait_for_non_spurious_irq(void);
-static void wait_poll_for_irq(void);
-static void wait_sched_poll_for_irq(void);
-static void wait_migrate_poll_for_irq(void);
-
-irq_wait_method_t irq_wait_method[] = {
-       wait_for_non_spurious_irq,
-       wait_poll_for_irq,
-       wait_sched_poll_for_irq,
-       wait_migrate_poll_for_irq,
-};
-
-enum timer_view {
-       TIMER_CVAL,
-       TIMER_TVAL,
-};
-
-static void assert_irqs_handled(uint32_t n)
-{
-       int h = atomic_read(&shared_data.handled);
-
-       __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
-}
-
-static void userspace_cmd(uint64_t cmd)
-{
-       GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
-}
-
-static void userspace_migrate_vcpu(void)
-{
-       userspace_cmd(USERSPACE_MIGRATE_SELF);
-}
-
-static void userspace_sleep(uint64_t usecs)
-{
-       GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
-}
-
-static void set_counter(enum arch_timer timer, uint64_t counter)
-{
-       GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
-}
-
-static void guest_irq_handler(struct ex_regs *regs)
-{
-       unsigned int intid = gic_get_and_ack_irq();
-       enum arch_timer timer;
-       uint64_t cnt, cval;
-       uint32_t ctl;
-       bool timer_condition, istatus;
-
-       if (intid == IAR_SPURIOUS) {
-               atomic_inc(&shared_data.spurious);
-               goto out;
-       }
-
-       if (intid == ptimer_irq)
-               timer = PHYSICAL;
-       else if (intid == vtimer_irq)
-               timer = VIRTUAL;
-       else
-               goto out;
-
-       ctl = timer_get_ctl(timer);
-       cval = timer_get_cval(timer);
-       cnt = timer_get_cntct(timer);
-       timer_condition = cnt >= cval;
-       istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
-       GUEST_ASSERT_EQ(timer_condition, istatus);
-
-       /* Disable and mask the timer. */
-       timer_set_ctl(timer, CTL_IMASK);
-
-       atomic_inc(&shared_data.handled);
-
-out:
-       gic_set_eoi(intid);
-}
-
-static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
-                        uint32_t ctl)
-{
-       atomic_set(&shared_data.handled, 0);
-       atomic_set(&shared_data.spurious, 0);
-       timer_set_cval(timer, cval_cycles);
-       timer_set_ctl(timer, ctl);
-}
-
-static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
-                        uint32_t ctl)
-{
-       atomic_set(&shared_data.handled, 0);
-       atomic_set(&shared_data.spurious, 0);
-       timer_set_ctl(timer, ctl);
-       timer_set_tval(timer, tval_cycles);
-}
-
-static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
-                        enum timer_view tv)
-{
-       switch (tv) {
-       case TIMER_CVAL:
-               set_cval_irq(timer, xval, ctl);
-               break;
-       case TIMER_TVAL:
-               set_tval_irq(timer, xval, ctl);
-               break;
-       default:
-               GUEST_FAIL("Could not get timer %d", timer);
-       }
-}
-
-/*
- * Note that this can theoretically hang forever, so we rely on having
- * a timeout mechanism in the "runner", like:
- * tools/testing/selftests/kselftest/runner.sh.
- */
-static void wait_for_non_spurious_irq(void)
-{
-       int h;
-
-       local_irq_disable();
-
-       for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
-               wfi();
-               local_irq_enable();
-               isb(); /* handle IRQ */
-               local_irq_disable();
-       }
-}
-
-/*
- * Wait for an non-spurious IRQ by polling in the guest or in
- * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
- *
- * Note that this can theoretically hang forever, so we rely on having
- * a timeout mechanism in the "runner", like:
- * tools/testing/selftests/kselftest/runner.sh.
- */
-static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
-{
-       int h;
-
-       local_irq_disable();
-
-       h = atomic_read(&shared_data.handled);
-
-       local_irq_enable();
-       while (h == atomic_read(&shared_data.handled)) {
-               if (usp_cmd == NO_USERSPACE_CMD)
-                       cpu_relax();
-               else
-                       userspace_cmd(usp_cmd);
-       }
-       local_irq_disable();
-}
-
-static void wait_poll_for_irq(void)
-{
-       poll_for_non_spurious_irq(NO_USERSPACE_CMD);
-}
-
-static void wait_sched_poll_for_irq(void)
-{
-       poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
-}
-
-static void wait_migrate_poll_for_irq(void)
-{
-       poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
-}
-
-/*
- * Sleep for usec microseconds by polling in the guest or in
- * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
- */
-static void guest_poll(enum arch_timer test_timer, uint64_t usec,
-                      enum sync_cmd usp_cmd)
-{
-       uint64_t cycles = usec_to_cycles(usec);
-       /* Whichever timer we are testing with, sleep with the other. */
-       enum arch_timer sleep_timer = 1 - test_timer;
-       uint64_t start = timer_get_cntct(sleep_timer);
-
-       while ((timer_get_cntct(sleep_timer) - start) < cycles) {
-               if (usp_cmd == NO_USERSPACE_CMD)
-                       cpu_relax();
-               else
-                       userspace_cmd(usp_cmd);
-       }
-}
-
-static void sleep_poll(enum arch_timer timer, uint64_t usec)
-{
-       guest_poll(timer, usec, NO_USERSPACE_CMD);
-}
-
-static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
-{
-       guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
-}
-
-static void sleep_migrate(enum arch_timer timer, uint64_t usec)
-{
-       guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
-}
-
-static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
-{
-       userspace_sleep(usec);
-}
-
-/*
- * Reset the timer state to some nice values like the counter not being close
- * to the edge, and the control register masked and disabled.
- */
-static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
-{
-       set_counter(timer, cnt);
-       timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_timer_xval(enum arch_timer timer, uint64_t xval,
-                           enum timer_view tv, irq_wait_method_t wm, bool reset_state,
-                           uint64_t reset_cnt)
-{
-       local_irq_disable();
-
-       if (reset_state)
-               reset_timer_state(timer, reset_cnt);
-
-       set_xval_irq(timer, xval, CTL_ENABLE, tv);
-
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wm();
-
-       assert_irqs_handled(1);
-       local_irq_enable();
-}
-
-/*
- * The test_timer_* functions will program the timer, wait for it, and assert
- * the firing of the correct IRQ.
- *
- * These functions don't have a timeout and return as soon as they receive an
- * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
- * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
- */
-
-static void test_timer_cval(enum arch_timer timer, uint64_t cval,
-                           irq_wait_method_t wm, bool reset_state,
-                           uint64_t reset_cnt)
-{
-       test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
-}
-
-static void test_timer_tval(enum arch_timer timer, int32_t tval,
-                           irq_wait_method_t wm, bool reset_state,
-                           uint64_t reset_cnt)
-{
-       test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
-                       reset_cnt);
-}
-
-static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
-                                  uint64_t usec, enum timer_view timer_view,
-                                  sleep_method_t guest_sleep)
-{
-       local_irq_disable();
-
-       set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
-       guest_sleep(timer, usec);
-
-       local_irq_enable();
-       isb();
-
-       /* Assume success (no IRQ) after waiting usec microseconds */
-       assert_irqs_handled(0);
-}
-
-static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
-                            uint64_t usec, sleep_method_t wm)
-{
-       test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
-}
-
-static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
-                            sleep_method_t wm)
-{
-       /* tval will be cast to an int32_t in test_xval_check_no_irq */
-       test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
-}
-
-/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
-static void test_timer_control_mask_then_unmask(enum arch_timer timer)
-{
-       reset_timer_state(timer, DEF_CNT);
-       set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
-
-       /* Unmask the timer, and then get an IRQ. */
-       local_irq_disable();
-       timer_set_ctl(timer, CTL_ENABLE);
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wait_for_non_spurious_irq();
-
-       assert_irqs_handled(1);
-       local_irq_enable();
-}
-
-/* Check that timer control masks actually mask a timer being fired. */
-static void test_timer_control_masks(enum arch_timer timer)
-{
-       reset_timer_state(timer, DEF_CNT);
-
-       /* Local IRQs are not masked at this point. */
-
-       set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
-
-       /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
-       sleep_poll(timer, TIMEOUT_NO_IRQ_US);
-
-       assert_irqs_handled(0);
-       timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_fire_a_timer_multiple_times(enum arch_timer timer,
-                                            irq_wait_method_t wm, int num)
-{
-       int i;
-
-       local_irq_disable();
-       reset_timer_state(timer, DEF_CNT);
-
-       set_tval_irq(timer, 0, CTL_ENABLE);
-
-       for (i = 1; i <= num; i++) {
-               /* This method re-enables IRQs to handle the one we're looking for. */
-               wm();
-
-               /* The IRQ handler masked and disabled the timer.
-                * Enable and unmmask it again.
-                */
-               timer_set_ctl(timer, CTL_ENABLE);
-
-               assert_irqs_handled(i);
-       }
-
-       local_irq_enable();
-}
-
-static void test_timers_fired_multiple_times(enum arch_timer timer)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
-               test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
-}
-
-/*
- * Set a timer for tval=delta_1_ms then reprogram it to
- * tval=delta_2_ms. Check that we get the timer fired. There is no
- * timeout for the wait: we use the wfi instruction.
- */
-static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
-                                    int32_t delta_1_ms, int32_t delta_2_ms)
-{
-       local_irq_disable();
-       reset_timer_state(timer, DEF_CNT);
-
-       /* Program the timer to DEF_CNT + delta_1_ms. */
-       set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
-
-       /* Reprogram the timer to DEF_CNT + delta_2_ms. */
-       timer_set_tval(timer, msec_to_cycles(delta_2_ms));
-
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wm();
-
-       /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
-       GUEST_ASSERT(timer_get_cntct(timer) >=
-                    DEF_CNT + msec_to_cycles(delta_2_ms));
-
-       local_irq_enable();
-       assert_irqs_handled(1);
-};
-
-static void test_reprogram_timers(enum arch_timer timer)
-{
-       int i;
-       uint64_t base_wait = test_args.wait_ms;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               /*
-                * Ensure reprogramming works whether going from a
-                * longer time to a shorter or vice versa.
-                */
-               test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
-                                        base_wait);
-               test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
-                                        2 * base_wait);
-       }
-}
-
-static void test_basic_functionality(enum arch_timer timer)
-{
-       int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
-       uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               irq_wait_method_t wm = irq_wait_method[i];
-
-               test_timer_cval(timer, cval, wm, true, DEF_CNT);
-               test_timer_tval(timer, tval, wm, true, DEF_CNT);
-       }
-}
-
-/*
- * This test checks basic timer behavior without actually firing timers, things
- * like: the relationship between cval and tval, tval down-counting.
- */
-static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
-{
-       reset_timer_state(timer, DEF_CNT);
-
-       local_irq_disable();
-
-       /* cval in the past */
-       timer_set_cval(timer,
-                      timer_get_cntct(timer) -
-                      msec_to_cycles(test_args.wait_ms));
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_tval(timer) < 0);
-
-       /* tval in the past */
-       timer_set_tval(timer, -1);
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
-
-       /* tval larger than TVAL_MAX. This requires programming with
-        * timer_set_cval instead so the value is expressible
-        */
-       timer_set_cval(timer,
-                      timer_get_cntct(timer) + TVAL_MAX +
-                      msec_to_cycles(test_args.wait_ms));
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_tval(timer) <= 0);
-
-       /*
-        * tval larger than 2 * TVAL_MAX.
-        * Twice the TVAL_MAX completely loops around the TVAL.
-        */
-       timer_set_cval(timer,
-                      timer_get_cntct(timer) + 2ULL * TVAL_MAX +
-                      msec_to_cycles(test_args.wait_ms));
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_tval(timer) <=
-                      msec_to_cycles(test_args.wait_ms));
-
-       /* negative tval that rollovers from 0. */
-       set_counter(timer, msec_to_cycles(1));
-       timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
-       if (use_sched)
-               userspace_migrate_vcpu();
-       GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
-
-       /* tval should keep down-counting from 0 to -1. */
-       timer_set_tval(timer, 0);
-       sleep_poll(timer, 1);
-       GUEST_ASSERT(timer_get_tval(timer) < 0);
-
-       local_irq_enable();
-
-       /* Mask and disable any pending timer. */
-       timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_timers_sanity_checks(enum arch_timer timer)
-{
-       timers_sanity_checks(timer, false);
-       /* Check how KVM saves/restores these edge-case values. */
-       timers_sanity_checks(timer, true);
-}
-
-static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
-{
-       local_irq_disable();
-       reset_timer_state(timer, DEF_CNT);
-
-       set_cval_irq(timer,
-                    (uint64_t) TVAL_MAX +
-                    msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
-
-       set_counter(timer, TVAL_MAX);
-
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wm();
-
-       assert_irqs_handled(1);
-       local_irq_enable();
-}
-
-/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
-static void test_timers_above_tval_max(enum arch_timer timer)
-{
-       uint64_t cval;
-       int i;
-
-       /*
-        * Test that the system is not implementing cval in terms of
-        * tval.  If that was the case, setting a cval to "cval = now
-        * + TVAL_MAX + wait_ms" would wrap to "cval = now +
-        * wait_ms", and the timer would fire immediately. Test that it
-        * doesn't.
-        */
-       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
-               reset_timer_state(timer, DEF_CNT);
-               cval = timer_get_cntct(timer) + TVAL_MAX +
-                       msec_to_cycles(test_args.wait_ms);
-               test_cval_no_irq(timer, cval,
-                                msecs_to_usecs(test_args.wait_ms) +
-                                TIMEOUT_NO_IRQ_US, sleep_method[i]);
-       }
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               /* Get the IRQ by moving the counter forward. */
-               test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
-       }
-}
-
-/*
- * Template function to be used by the test_move_counter_ahead_* tests.  It
- * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
- * then waits for an IRQ.
- */
-static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
-                                   uint64_t xval, uint64_t cnt_2,
-                                   irq_wait_method_t wm, enum timer_view tv)
-{
-       local_irq_disable();
-
-       set_counter(timer, cnt_1);
-       timer_set_ctl(timer, CTL_IMASK);
-
-       set_xval_irq(timer, xval, CTL_ENABLE, tv);
-       set_counter(timer, cnt_2);
-       /* This method re-enables IRQs to handle the one we're looking for. */
-       wm();
-
-       assert_irqs_handled(1);
-       local_irq_enable();
-}
-
-/*
- * Template function to be used by the test_move_counter_ahead_* tests.  It
- * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
- * then waits for an IRQ.
- */
-static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
-                                          uint64_t cnt_1, uint64_t xval,
-                                          uint64_t cnt_2,
-                                          sleep_method_t guest_sleep,
-                                          enum timer_view tv)
-{
-       local_irq_disable();
-
-       set_counter(timer, cnt_1);
-       timer_set_ctl(timer, CTL_IMASK);
-
-       set_xval_irq(timer, xval, CTL_ENABLE, tv);
-       set_counter(timer, cnt_2);
-       guest_sleep(timer, TIMEOUT_NO_IRQ_US);
-
-       local_irq_enable();
-       isb();
-
-       /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
-       assert_irqs_handled(0);
-       timer_set_ctl(timer, CTL_IMASK);
-}
-
-static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
-                                   int32_t tval, uint64_t cnt_2,
-                                   irq_wait_method_t wm)
-{
-       test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
-}
-
-static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
-                                   uint64_t cval, uint64_t cnt_2,
-                                   irq_wait_method_t wm)
-{
-       test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
-}
-
-static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
-                                          uint64_t cnt_1, int32_t tval,
-                                          uint64_t cnt_2, sleep_method_t wm)
-{
-       test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
-                                      TIMER_TVAL);
-}
-
-static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
-                                          uint64_t cnt_1, uint64_t cval,
-                                          uint64_t cnt_2, sleep_method_t wm)
-{
-       test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
-                                      TIMER_CVAL);
-}
-
-/* Set a timer and then move the counter ahead of it. */
-static void test_move_counters_ahead_of_timers(enum arch_timer timer)
-{
-       int i;
-       int32_t tval;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               irq_wait_method_t wm = irq_wait_method[i];
-
-               test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
-               test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
-
-               /* Move counter ahead of negative tval. */
-               test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
-               test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
-               tval = TVAL_MAX;
-               test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
-                                       wm);
-       }
-
-       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
-               sleep_method_t sm = sleep_method[i];
-
-               test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
-       }
-}
-
-/*
- * Program a timer, mask it, and then change the tval or counter to cancel it.
- * Unmask it and check that nothing fires.
- */
-static void test_move_counters_behind_timers(enum arch_timer timer)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
-               sleep_method_t sm = sleep_method[i];
-
-               test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
-                                              sm);
-               test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
-       }
-}
-
-static void test_timers_in_the_past(enum arch_timer timer)
-{
-       int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
-       uint64_t cval;
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               irq_wait_method_t wm = irq_wait_method[i];
-
-               /* set a timer wait_ms the past. */
-               cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
-               test_timer_cval(timer, cval, wm, true, DEF_CNT);
-               test_timer_tval(timer, tval, wm, true, DEF_CNT);
-
-               /* Set a timer to counter=0 (in the past) */
-               test_timer_cval(timer, 0, wm, true, DEF_CNT);
-
-               /* Set a time for tval=0 (now) */
-               test_timer_tval(timer, 0, wm, true, DEF_CNT);
-
-               /* Set a timer to as far in the past as possible */
-               test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
-       }
-
-       /*
-        * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
-        * IRQ as that tval means cval=CVAL_MAX-wait_ms.
-        */
-       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
-               sleep_method_t sm = sleep_method[i];
-
-               set_counter(timer, msec_to_cycles(test_args.wait_ms));
-               test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
-       }
-}
-
-static void test_long_timer_delays(enum arch_timer timer)
-{
-       int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
-       uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
-               irq_wait_method_t wm = irq_wait_method[i];
-
-               test_timer_cval(timer, cval, wm, true, DEF_CNT);
-               test_timer_tval(timer, tval, wm, true, DEF_CNT);
-       }
-}
-
-static void guest_run_iteration(enum arch_timer timer)
-{
-       test_basic_functionality(timer);
-       test_timers_sanity_checks(timer);
-
-       test_timers_above_tval_max(timer);
-       test_timers_in_the_past(timer);
-
-       test_move_counters_ahead_of_timers(timer);
-       test_move_counters_behind_timers(timer);
-       test_reprogram_timers(timer);
-
-       test_timers_fired_multiple_times(timer);
-
-       test_timer_control_mask_then_unmask(timer);
-       test_timer_control_masks(timer);
-}
-
-static void guest_code(enum arch_timer timer)
-{
-       int i;
-
-       local_irq_disable();
-
-       gic_init(GIC_V3, 1);
-
-       timer_set_ctl(VIRTUAL, CTL_IMASK);
-       timer_set_ctl(PHYSICAL, CTL_IMASK);
-
-       gic_irq_enable(vtimer_irq);
-       gic_irq_enable(ptimer_irq);
-       local_irq_enable();
-
-       for (i = 0; i < test_args.iterations; i++) {
-               GUEST_SYNC(i);
-               guest_run_iteration(timer);
-       }
-
-       test_long_timer_delays(timer);
-       GUEST_DONE();
-}
-
-static uint32_t next_pcpu(void)
-{
-       uint32_t max = get_nprocs();
-       uint32_t cur = sched_getcpu();
-       uint32_t next = cur;
-       cpu_set_t cpuset;
-
-       TEST_ASSERT(max > 1, "Need at least two physical cpus");
-
-       sched_getaffinity(0, sizeof(cpuset), &cpuset);
-
-       do {
-               next = (next + 1) % CPU_SETSIZE;
-       } while (!CPU_ISSET(next, &cpuset));
-
-       return next;
-}
-
-static void migrate_self(uint32_t new_pcpu)
-{
-       int ret;
-       cpu_set_t cpuset;
-       pthread_t thread;
-
-       thread = pthread_self();
-
-       CPU_ZERO(&cpuset);
-       CPU_SET(new_pcpu, &cpuset);
-
-       pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
-
-       ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
-
-       TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
-                   new_pcpu, ret);
-}
-
-static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
-                          enum arch_timer timer)
-{
-       if (timer == PHYSICAL)
-               vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
-       else
-               vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
-}
-
-static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
-{
-       enum sync_cmd cmd = uc->args[1];
-       uint64_t val = uc->args[2];
-       enum arch_timer timer = uc->args[3];
-
-       switch (cmd) {
-       case SET_COUNTER_VALUE:
-               kvm_set_cntxct(vcpu, val, timer);
-               break;
-       case USERSPACE_USLEEP:
-               usleep(val);
-               break;
-       case USERSPACE_SCHED_YIELD:
-               sched_yield();
-               break;
-       case USERSPACE_MIGRATE_SELF:
-               migrate_self(next_pcpu());
-               break;
-       default:
-               break;
-       }
-}
-
-static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       /* Start on CPU 0 */
-       migrate_self(0);
-
-       while (true) {
-               vcpu_run(vcpu);
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       handle_sync(vcpu, &uc);
-                       break;
-               case UCALL_DONE:
-                       goto out;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       goto out;
-               default:
-                       TEST_FAIL("Unexpected guest exit\n");
-               }
-       }
-
- out:
-       return;
-}
-
-static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
-                            KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
-       vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
-                            KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
-
-       sync_global_to_guest(vm, ptimer_irq);
-       sync_global_to_guest(vm, vtimer_irq);
-
-       pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
-}
-
-static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
-                          enum arch_timer timer)
-{
-       *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-       TEST_ASSERT(*vm, "Failed to create the test VM\n");
-
-       vm_init_descriptor_tables(*vm);
-       vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
-                                    guest_irq_handler);
-
-       vcpu_init_descriptor_tables(*vcpu);
-       vcpu_args_set(*vcpu, 1, timer);
-
-       test_init_timer_irq(*vm, *vcpu);
-       vgic_v3_setup(*vm, 1, 64);
-       sync_global_to_guest(*vm, test_args);
-}
-
-static void test_print_help(char *name)
-{
-       pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
-               , name);
-       pr_info("\t-i: Number of iterations (default: %u)\n",
-               NR_TEST_ITERS_DEF);
-       pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
-       pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
-            LONG_WAIT_TEST_MS);
-       pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
-               WAIT_TEST_MS);
-       pr_info("\t-p: Test physical timer (default: true)\n");
-       pr_info("\t-v: Test virtual timer (default: true)\n");
-       pr_info("\t-h: Print this help message\n");
-}
-
-static bool parse_args(int argc, char *argv[])
-{
-       int opt;
-
-       while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
-               switch (opt) {
-               case 'b':
-                       test_args.test_physical = true;
-                       test_args.test_virtual = true;
-                       break;
-               case 'i':
-                       test_args.iterations =
-                           atoi_positive("Number of iterations", optarg);
-                       break;
-               case 'l':
-                       test_args.long_wait_ms =
-                           atoi_positive("Long wait time", optarg);
-                       break;
-               case 'p':
-                       test_args.test_physical = true;
-                       test_args.test_virtual = false;
-                       break;
-               case 'v':
-                       test_args.test_virtual = true;
-                       test_args.test_physical = false;
-                       break;
-               case 'w':
-                       test_args.wait_ms = atoi_positive("Wait time", optarg);
-                       break;
-               case 'h':
-               default:
-                       goto err;
-               }
-       }
-
-       return true;
-
- err:
-       test_print_help(argv[0]);
-       return false;
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       /* Tell stdout not to buffer its content */
-       setbuf(stdout, NULL);
-
-       if (!parse_args(argc, argv))
-               exit(KSFT_SKIP);
-
-       if (test_args.test_virtual) {
-               test_vm_create(&vm, &vcpu, VIRTUAL);
-               test_run(vm, vcpu);
-               kvm_vm_free(vm);
-       }
-
-       if (test_args.test_physical) {
-               test_vm_create(&vm, &vcpu, PHYSICAL);
-               test_run(vm, vcpu);
-               kvm_vm_free(vm);
-       }
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
deleted file mode 100644 (file)
index c7fb55c..0000000
+++ /dev/null
@@ -1,607 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-#include <linux/bitfield.h>
-
-#define MDSCR_KDE      (1 << 13)
-#define MDSCR_MDE      (1 << 15)
-#define MDSCR_SS       (1 << 0)
-
-#define DBGBCR_LEN8    (0xff << 5)
-#define DBGBCR_EXEC    (0x0 << 3)
-#define DBGBCR_EL1     (0x1 << 1)
-#define DBGBCR_E       (0x1 << 0)
-#define DBGBCR_LBN_SHIFT       16
-#define DBGBCR_BT_SHIFT                20
-#define DBGBCR_BT_ADDR_LINK_CTX        (0x1 << DBGBCR_BT_SHIFT)
-#define DBGBCR_BT_CTX_LINK     (0x3 << DBGBCR_BT_SHIFT)
-
-#define DBGWCR_LEN8    (0xff << 5)
-#define DBGWCR_RD      (0x1 << 3)
-#define DBGWCR_WR      (0x2 << 3)
-#define DBGWCR_EL1     (0x1 << 1)
-#define DBGWCR_E       (0x1 << 0)
-#define DBGWCR_LBN_SHIFT       16
-#define DBGWCR_WT_SHIFT                20
-#define DBGWCR_WT_LINK         (0x1 << DBGWCR_WT_SHIFT)
-
-#define SPSR_D         (1 << 9)
-#define SPSR_SS                (1 << 21)
-
-extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
-extern unsigned char iter_ss_begin, iter_ss_end;
-static volatile uint64_t sw_bp_addr, hw_bp_addr;
-static volatile uint64_t wp_addr, wp_data_addr;
-static volatile uint64_t svc_addr;
-static volatile uint64_t ss_addr[4], ss_idx;
-#define  PC(v)  ((uint64_t)&(v))
-
-#define GEN_DEBUG_WRITE_REG(reg_name)                  \
-static void write_##reg_name(int num, uint64_t val)    \
-{                                                      \
-       switch (num) {                                  \
-       case 0:                                         \
-               write_sysreg(val, reg_name##0_el1);     \
-               break;                                  \
-       case 1:                                         \
-               write_sysreg(val, reg_name##1_el1);     \
-               break;                                  \
-       case 2:                                         \
-               write_sysreg(val, reg_name##2_el1);     \
-               break;                                  \
-       case 3:                                         \
-               write_sysreg(val, reg_name##3_el1);     \
-               break;                                  \
-       case 4:                                         \
-               write_sysreg(val, reg_name##4_el1);     \
-               break;                                  \
-       case 5:                                         \
-               write_sysreg(val, reg_name##5_el1);     \
-               break;                                  \
-       case 6:                                         \
-               write_sysreg(val, reg_name##6_el1);     \
-               break;                                  \
-       case 7:                                         \
-               write_sysreg(val, reg_name##7_el1);     \
-               break;                                  \
-       case 8:                                         \
-               write_sysreg(val, reg_name##8_el1);     \
-               break;                                  \
-       case 9:                                         \
-               write_sysreg(val, reg_name##9_el1);     \
-               break;                                  \
-       case 10:                                        \
-               write_sysreg(val, reg_name##10_el1);    \
-               break;                                  \
-       case 11:                                        \
-               write_sysreg(val, reg_name##11_el1);    \
-               break;                                  \
-       case 12:                                        \
-               write_sysreg(val, reg_name##12_el1);    \
-               break;                                  \
-       case 13:                                        \
-               write_sysreg(val, reg_name##13_el1);    \
-               break;                                  \
-       case 14:                                        \
-               write_sysreg(val, reg_name##14_el1);    \
-               break;                                  \
-       case 15:                                        \
-               write_sysreg(val, reg_name##15_el1);    \
-               break;                                  \
-       default:                                        \
-               GUEST_ASSERT(0);                        \
-       }                                               \
-}
-
-/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
-GEN_DEBUG_WRITE_REG(dbgbcr)
-GEN_DEBUG_WRITE_REG(dbgbvr)
-GEN_DEBUG_WRITE_REG(dbgwcr)
-GEN_DEBUG_WRITE_REG(dbgwvr)
-
-static void reset_debug_state(void)
-{
-       uint8_t brps, wrps, i;
-       uint64_t dfr0;
-
-       asm volatile("msr daifset, #8");
-
-       write_sysreg(0, osdlr_el1);
-       write_sysreg(0, oslar_el1);
-       isb();
-
-       write_sysreg(0, mdscr_el1);
-       write_sysreg(0, contextidr_el1);
-
-       /* Reset all bcr/bvr/wcr/wvr registers */
-       dfr0 = read_sysreg(id_aa64dfr0_el1);
-       brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
-       for (i = 0; i <= brps; i++) {
-               write_dbgbcr(i, 0);
-               write_dbgbvr(i, 0);
-       }
-       wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
-       for (i = 0; i <= wrps; i++) {
-               write_dbgwcr(i, 0);
-               write_dbgwvr(i, 0);
-       }
-
-       isb();
-}
-
-static void enable_os_lock(void)
-{
-       write_sysreg(1, oslar_el1);
-       isb();
-
-       GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
-}
-
-static void enable_monitor_debug_exceptions(void)
-{
-       uint32_t mdscr;
-
-       asm volatile("msr daifclr, #8");
-
-       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
-       write_sysreg(mdscr, mdscr_el1);
-       isb();
-}
-
-static void install_wp(uint8_t wpn, uint64_t addr)
-{
-       uint32_t wcr;
-
-       wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
-       write_dbgwcr(wpn, wcr);
-       write_dbgwvr(wpn, addr);
-
-       isb();
-
-       enable_monitor_debug_exceptions();
-}
-
-static void install_hw_bp(uint8_t bpn, uint64_t addr)
-{
-       uint32_t bcr;
-
-       bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
-       write_dbgbcr(bpn, bcr);
-       write_dbgbvr(bpn, addr);
-       isb();
-
-       enable_monitor_debug_exceptions();
-}
-
-static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
-                          uint64_t ctx)
-{
-       uint32_t wcr;
-       uint64_t ctx_bcr;
-
-       /* Setup a context-aware breakpoint for Linked Context ID Match */
-       ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
-                 DBGBCR_BT_CTX_LINK;
-       write_dbgbcr(ctx_bp, ctx_bcr);
-       write_dbgbvr(ctx_bp, ctx);
-
-       /* Setup a linked watchpoint (linked to the context-aware breakpoint) */
-       wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
-             DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
-       write_dbgwcr(addr_wp, wcr);
-       write_dbgwvr(addr_wp, addr);
-       isb();
-
-       enable_monitor_debug_exceptions();
-}
-
-void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
-                      uint64_t ctx)
-{
-       uint32_t addr_bcr, ctx_bcr;
-
-       /* Setup a context-aware breakpoint for Linked Context ID Match */
-       ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
-                 DBGBCR_BT_CTX_LINK;
-       write_dbgbcr(ctx_bp, ctx_bcr);
-       write_dbgbvr(ctx_bp, ctx);
-
-       /*
-        * Setup a normal breakpoint for Linked Address Match, and link it
-        * to the context-aware breakpoint.
-        */
-       addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
-                  DBGBCR_BT_ADDR_LINK_CTX |
-                  ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
-       write_dbgbcr(addr_bp, addr_bcr);
-       write_dbgbvr(addr_bp, addr);
-       isb();
-
-       enable_monitor_debug_exceptions();
-}
-
-static void install_ss(void)
-{
-       uint32_t mdscr;
-
-       asm volatile("msr daifclr, #8");
-
-       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
-       write_sysreg(mdscr, mdscr_el1);
-       isb();
-}
-
-static volatile char write_data;
-
-static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
-{
-       uint64_t ctx = 0xabcdef;        /* a random context number */
-
-       /* Software-breakpoint */
-       reset_debug_state();
-       asm volatile("sw_bp: brk #0");
-       GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
-
-       /* Hardware-breakpoint */
-       reset_debug_state();
-       install_hw_bp(bpn, PC(hw_bp));
-       asm volatile("hw_bp: nop");
-       GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
-
-       /* Hardware-breakpoint + svc */
-       reset_debug_state();
-       install_hw_bp(bpn, PC(bp_svc));
-       asm volatile("bp_svc: svc #0");
-       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
-       GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
-
-       /* Hardware-breakpoint + software-breakpoint */
-       reset_debug_state();
-       install_hw_bp(bpn, PC(bp_brk));
-       asm volatile("bp_brk: brk #0");
-       GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
-       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
-
-       /* Watchpoint */
-       reset_debug_state();
-       install_wp(wpn, PC(write_data));
-       write_data = 'x';
-       GUEST_ASSERT_EQ(write_data, 'x');
-       GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
-
-       /* Single-step */
-       reset_debug_state();
-       install_ss();
-       ss_idx = 0;
-       asm volatile("ss_start:\n"
-                    "mrs x0, esr_el1\n"
-                    "add x0, x0, #1\n"
-                    "msr daifset, #8\n"
-                    : : : "x0");
-       GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
-       GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
-       GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
-
-       /* OS Lock does not block software-breakpoint */
-       reset_debug_state();
-       enable_os_lock();
-       sw_bp_addr = 0;
-       asm volatile("sw_bp2: brk #0");
-       GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
-
-       /* OS Lock blocking hardware-breakpoint */
-       reset_debug_state();
-       enable_os_lock();
-       install_hw_bp(bpn, PC(hw_bp2));
-       hw_bp_addr = 0;
-       asm volatile("hw_bp2: nop");
-       GUEST_ASSERT_EQ(hw_bp_addr, 0);
-
-       /* OS Lock blocking watchpoint */
-       reset_debug_state();
-       enable_os_lock();
-       write_data = '\0';
-       wp_data_addr = 0;
-       install_wp(wpn, PC(write_data));
-       write_data = 'x';
-       GUEST_ASSERT_EQ(write_data, 'x');
-       GUEST_ASSERT_EQ(wp_data_addr, 0);
-
-       /* OS Lock blocking single-step */
-       reset_debug_state();
-       enable_os_lock();
-       ss_addr[0] = 0;
-       install_ss();
-       ss_idx = 0;
-       asm volatile("mrs x0, esr_el1\n\t"
-                    "add x0, x0, #1\n\t"
-                    "msr daifset, #8\n\t"
-                    : : : "x0");
-       GUEST_ASSERT_EQ(ss_addr[0], 0);
-
-       /* Linked hardware-breakpoint */
-       hw_bp_addr = 0;
-       reset_debug_state();
-       install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
-       /* Set context id */
-       write_sysreg(ctx, contextidr_el1);
-       isb();
-       asm volatile("hw_bp_ctx: nop");
-       write_sysreg(0, contextidr_el1);
-       GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
-
-       /* Linked watchpoint */
-       reset_debug_state();
-       install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
-       /* Set context id */
-       write_sysreg(ctx, contextidr_el1);
-       isb();
-       write_data = 'x';
-       GUEST_ASSERT_EQ(write_data, 'x');
-       GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
-
-       GUEST_DONE();
-}
-
-static void guest_sw_bp_handler(struct ex_regs *regs)
-{
-       sw_bp_addr = regs->pc;
-       regs->pc += 4;
-}
-
-static void guest_hw_bp_handler(struct ex_regs *regs)
-{
-       hw_bp_addr = regs->pc;
-       regs->pstate |= SPSR_D;
-}
-
-static void guest_wp_handler(struct ex_regs *regs)
-{
-       wp_data_addr = read_sysreg(far_el1);
-       wp_addr = regs->pc;
-       regs->pstate |= SPSR_D;
-}
-
-static void guest_ss_handler(struct ex_regs *regs)
-{
-       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
-       ss_addr[ss_idx++] = regs->pc;
-       regs->pstate |= SPSR_SS;
-}
-
-static void guest_svc_handler(struct ex_regs *regs)
-{
-       svc_addr = regs->pc;
-}
-
-static void guest_code_ss(int test_cnt)
-{
-       uint64_t i;
-       uint64_t bvr, wvr, w_bvr, w_wvr;
-
-       for (i = 0; i < test_cnt; i++) {
-               /* Bits [1:0] of dbg{b,w}vr are RES0 */
-               w_bvr = i << 2;
-               w_wvr = i << 2;
-
-               /*
-                * Enable Single Step execution.  Note!  This _must_ be a bare
-                * ucall as the ucall() path uses atomic operations to manage
-                * the ucall structures, and the built-in "atomics" are usually
-                * implemented via exclusive access instructions.  The exlusive
-                * monitor is cleared on ERET, and so taking debug exceptions
-                * during a LDREX=>STREX sequence will prevent forward progress
-                * and hang the guest/test.
-                */
-               GUEST_UCALL_NONE();
-
-               /*
-                * The userspace will verify that the pc is as expected during
-                * single step execution between iter_ss_begin and iter_ss_end.
-                */
-               asm volatile("iter_ss_begin:nop\n");
-
-               write_sysreg(w_bvr, dbgbvr0_el1);
-               write_sysreg(w_wvr, dbgwvr0_el1);
-               bvr = read_sysreg(dbgbvr0_el1);
-               wvr = read_sysreg(dbgwvr0_el1);
-
-               /* Userspace disables Single Step when the end is nigh. */
-               asm volatile("iter_ss_end:\n");
-
-               GUEST_ASSERT_EQ(bvr, w_bvr);
-               GUEST_ASSERT_EQ(wvr, w_wvr);
-       }
-       GUEST_DONE();
-}
-
-static int debug_version(uint64_t id_aa64dfr0)
-{
-       return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
-}
-
-static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vcpu);
-
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_BRK64, guest_sw_bp_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_SVC64, guest_svc_handler);
-
-       /* Specify bpn/wpn/ctx_bpn to be tested */
-       vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
-       pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
-
-       vcpu_run(vcpu);
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               goto done;
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-       }
-
-done:
-       kvm_vm_free(vm);
-}
-
-void test_single_step_from_userspace(int test_cnt)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       struct kvm_run *run;
-       uint64_t pc, cmd;
-       uint64_t test_pc = 0;
-       bool ss_enable = false;
-       struct kvm_guest_debug debug = {};
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
-       run = vcpu->run;
-       vcpu_args_set(vcpu, 1, test_cnt);
-
-       while (1) {
-               vcpu_run(vcpu);
-               if (run->exit_reason != KVM_EXIT_DEBUG) {
-                       cmd = get_ucall(vcpu, &uc);
-                       if (cmd == UCALL_ABORT) {
-                               REPORT_GUEST_ASSERT(uc);
-                               /* NOT REACHED */
-                       } else if (cmd == UCALL_DONE) {
-                               break;
-                       }
-
-                       TEST_ASSERT(cmd == UCALL_NONE,
-                                   "Unexpected ucall cmd 0x%lx", cmd);
-
-                       debug.control = KVM_GUESTDBG_ENABLE |
-                                       KVM_GUESTDBG_SINGLESTEP;
-                       ss_enable = true;
-                       vcpu_guest_debug_set(vcpu, &debug);
-                       continue;
-               }
-
-               TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
-
-               /* Check if the current pc is expected. */
-               pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
-               TEST_ASSERT(!test_pc || pc == test_pc,
-                           "Unexpected pc 0x%lx (expected 0x%lx)",
-                           pc, test_pc);
-
-               if ((pc + 4) == (uint64_t)&iter_ss_end) {
-                       test_pc = 0;
-                       debug.control = KVM_GUESTDBG_ENABLE;
-                       ss_enable = false;
-                       vcpu_guest_debug_set(vcpu, &debug);
-                       continue;
-               }
-
-               /*
-                * If the current pc is between iter_ss_bgin and
-                * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
-                * be the current pc + 4.
-                */
-               if ((pc >= (uint64_t)&iter_ss_begin) &&
-                   (pc < (uint64_t)&iter_ss_end))
-                       test_pc = pc + 4;
-               else
-                       test_pc = 0;
-       }
-
-       kvm_vm_free(vm);
-}
-
-/*
- * Run debug testing using the various breakpoint#, watchpoint# and
- * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
- */
-void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
-{
-       uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
-       int b, w, c;
-
-       /* Number of breakpoints */
-       brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
-       __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
-
-       /* Number of watchpoints */
-       wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
-
-       /* Number of context aware breakpoints */
-       ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
-
-       pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
-                brp_num, wrp_num, ctx_brp_num);
-
-       /* Number of normal (non-context aware) breakpoints */
-       normal_brp_num = brp_num - ctx_brp_num;
-
-       /* Lowest context aware breakpoint number */
-       ctx_brp_base = normal_brp_num;
-
-       /* Run tests with all supported breakpoints/watchpoints */
-       for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
-               for (b = 0; b < normal_brp_num; b++) {
-                       for (w = 0; w < wrp_num; w++)
-                               test_guest_debug_exceptions(b, w, c);
-               }
-       }
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("Usage: %s [-h] [-i iterations of the single step test]\n", name);
-       puts("");
-       exit(0);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       int opt;
-       int ss_iteration = 10000;
-       uint64_t aa64dfr0;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
-       __TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
-                      "Armv8 debug architecture not supported.");
-       kvm_vm_free(vm);
-
-       while ((opt = getopt(argc, argv, "i:")) != -1) {
-               switch (opt) {
-               case 'i':
-                       ss_iteration = atoi_positive("Number of iterations", optarg);
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       break;
-               }
-       }
-
-       test_guest_debug_exceptions_all(aa64dfr0);
-       test_single_step_from_userspace(ss_iteration);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
deleted file mode 100644 (file)
index d43fb3f..0000000
+++ /dev/null
@@ -1,771 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Check for KVM_GET_REG_LIST regressions.
- *
- * Copyright (C) 2020, Red Hat, Inc.
- *
- * While the blessed list should be created from the oldest possible
- * kernel, we can't go older than v5.2, though, because that's the first
- * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
- * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
- * registers won't match expectations.
- */
-#include <stdio.h>
-#include "kvm_util.h"
-#include "test_util.h"
-#include "processor.h"
-
-struct feature_id_reg {
-       __u64 reg;
-       __u64 id_reg;
-       __u64 feat_shift;
-       __u64 feat_min;
-};
-
-static struct feature_id_reg feat_id_regs[] = {
-       {
-               ARM64_SYS_REG(3, 0, 2, 0, 3),   /* TCR2_EL1 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               0,
-               1
-       },
-       {
-               ARM64_SYS_REG(3, 0, 10, 2, 2),  /* PIRE0_EL1 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               8,
-               1
-       },
-       {
-               ARM64_SYS_REG(3, 0, 10, 2, 3),  /* PIR_EL1 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               8,
-               1
-       },
-       {
-               ARM64_SYS_REG(3, 0, 10, 2, 4),  /* POR_EL1 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               16,
-               1
-       },
-       {
-               ARM64_SYS_REG(3, 3, 10, 2, 4),  /* POR_EL0 */
-               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-               16,
-               1
-       }
-};
-
-bool filter_reg(__u64 reg)
-{
-       /*
-        * DEMUX register presence depends on the host's CLIDR_EL1.
-        * This means there's no set of them that we can bless.
-        */
-       if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
-               return true;
-
-       return false;
-}
-
-static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
-{
-       int i, ret;
-       __u64 data, feat_val;
-
-       for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) {
-               if (feat_id_regs[i].reg == reg) {
-                       ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data);
-                       if (ret < 0)
-                               return false;
-
-                       feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf);
-                       return feat_val >= feat_id_regs[i].feat_min;
-               }
-       }
-
-       return true;
-}
-
-bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
-{
-       return check_supported_feat_reg(vcpu, reg);
-}
-
-bool check_reject_set(int err)
-{
-       return err == EPERM;
-}
-
-void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
-{
-       struct vcpu_reg_sublist *s;
-       int feature;
-
-       for_each_sublist(c, s) {
-               if (s->finalize) {
-                       feature = s->feature;
-                       vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
-               }
-       }
-}
-
-#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
-
-#define CORE_REGS_XX_NR_WORDS  2
-#define CORE_SPSR_XX_NR_WORDS  2
-#define CORE_FPREGS_XX_NR_WORDS        4
-
-static const char *core_id_to_str(const char *prefix, __u64 id)
-{
-       __u64 core_off = id & ~REG_MASK, idx;
-
-       /*
-        * core_off is the offset into struct kvm_regs
-        */
-       switch (core_off) {
-       case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
-            KVM_REG_ARM_CORE_REG(regs.regs[30]):
-               idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
-               return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
-       case KVM_REG_ARM_CORE_REG(regs.sp):
-               return "KVM_REG_ARM_CORE_REG(regs.sp)";
-       case KVM_REG_ARM_CORE_REG(regs.pc):
-               return "KVM_REG_ARM_CORE_REG(regs.pc)";
-       case KVM_REG_ARM_CORE_REG(regs.pstate):
-               return "KVM_REG_ARM_CORE_REG(regs.pstate)";
-       case KVM_REG_ARM_CORE_REG(sp_el1):
-               return "KVM_REG_ARM_CORE_REG(sp_el1)";
-       case KVM_REG_ARM_CORE_REG(elr_el1):
-               return "KVM_REG_ARM_CORE_REG(elr_el1)";
-       case KVM_REG_ARM_CORE_REG(spsr[0]) ...
-            KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
-               idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
-               TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
-               return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
-       case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
-            KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
-               idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
-               return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
-       case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
-               return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
-       case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
-               return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
-       }
-
-       TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
-       return NULL;
-}
-
-static const char *sve_id_to_str(const char *prefix, __u64 id)
-{
-       __u64 sve_off, n, i;
-
-       if (id == KVM_REG_ARM64_SVE_VLS)
-               return "KVM_REG_ARM64_SVE_VLS";
-
-       sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
-       i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
-
-       TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
-
-       switch (sve_off) {
-       case KVM_REG_ARM64_SVE_ZREG_BASE ...
-            KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
-               n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
-               TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
-                           "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
-               return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
-       case KVM_REG_ARM64_SVE_PREG_BASE ...
-            KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
-               n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
-               TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
-                           "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
-               return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
-       case KVM_REG_ARM64_SVE_FFR_BASE:
-               TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
-                           "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
-               return "KVM_REG_ARM64_SVE_FFR(0)";
-       }
-
-       return NULL;
-}
-
-void print_reg(const char *prefix, __u64 id)
-{
-       unsigned op0, op1, crn, crm, op2;
-       const char *reg_size = NULL;
-
-       TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
-                   "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
-
-       switch (id & KVM_REG_SIZE_MASK) {
-       case KVM_REG_SIZE_U8:
-               reg_size = "KVM_REG_SIZE_U8";
-               break;
-       case KVM_REG_SIZE_U16:
-               reg_size = "KVM_REG_SIZE_U16";
-               break;
-       case KVM_REG_SIZE_U32:
-               reg_size = "KVM_REG_SIZE_U32";
-               break;
-       case KVM_REG_SIZE_U64:
-               reg_size = "KVM_REG_SIZE_U64";
-               break;
-       case KVM_REG_SIZE_U128:
-               reg_size = "KVM_REG_SIZE_U128";
-               break;
-       case KVM_REG_SIZE_U256:
-               reg_size = "KVM_REG_SIZE_U256";
-               break;
-       case KVM_REG_SIZE_U512:
-               reg_size = "KVM_REG_SIZE_U512";
-               break;
-       case KVM_REG_SIZE_U1024:
-               reg_size = "KVM_REG_SIZE_U1024";
-               break;
-       case KVM_REG_SIZE_U2048:
-               reg_size = "KVM_REG_SIZE_U2048";
-               break;
-       default:
-               TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
-                         prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
-       }
-
-       switch (id & KVM_REG_ARM_COPROC_MASK) {
-       case KVM_REG_ARM_CORE:
-               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
-               break;
-       case KVM_REG_ARM_DEMUX:
-               TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
-                           "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
-               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
-                      reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
-               break;
-       case KVM_REG_ARM64_SYSREG:
-               op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
-               op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
-               crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
-               crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
-               op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
-               TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
-                           "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
-               printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
-               break;
-       case KVM_REG_ARM_FW:
-               TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
-                           "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
-               printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
-               break;
-       case KVM_REG_ARM_FW_FEAT_BMAP:
-               TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
-                           "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
-               printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
-               break;
-       case KVM_REG_ARM64_SVE:
-               printf("\t%s,\n", sve_id_to_str(prefix, id));
-               break;
-       default:
-               TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
-                         prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
-       }
-}
-
-/*
- * The original blessed list was primed with the output of kernel version
- * v4.15 with --core-reg-fixup and then later updated with new registers.
- * (The --core-reg-fixup option and it's fixup function have been removed
- * from the test, as it's unlikely to use this type of test on a kernel
- * older than v5.2.)
- *
- * The blessed list is up to date with kernel version v6.4 (or so we hope)
- */
-static __u64 base_regs[] = {
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
-       KVM_REG_ARM_FW_REG(0),          /* KVM_REG_ARM_PSCI_VERSION */
-       KVM_REG_ARM_FW_REG(1),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
-       KVM_REG_ARM_FW_REG(2),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
-       KVM_REG_ARM_FW_REG(3),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
-       KVM_REG_ARM_FW_FEAT_BMAP_REG(0),        /* KVM_REG_ARM_STD_BMAP */
-       KVM_REG_ARM_FW_FEAT_BMAP_REG(1),        /* KVM_REG_ARM_STD_HYP_BMAP */
-       KVM_REG_ARM_FW_FEAT_BMAP_REG(2),        /* KVM_REG_ARM_VENDOR_HYP_BMAP */
-       ARM64_SYS_REG(3, 3, 14, 3, 1),  /* CNTV_CTL_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 3, 2),  /* CNTV_CVAL_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 0, 2),
-       ARM64_SYS_REG(3, 0, 0, 0, 0),   /* MIDR_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 0, 6),   /* REVIDR_EL1 */
-       ARM64_SYS_REG(3, 1, 0, 0, 1),   /* CLIDR_EL1 */
-       ARM64_SYS_REG(3, 1, 0, 0, 7),   /* AIDR_EL1 */
-       ARM64_SYS_REG(3, 3, 0, 0, 1),   /* CTR_EL0 */
-       ARM64_SYS_REG(2, 0, 0, 0, 4),
-       ARM64_SYS_REG(2, 0, 0, 0, 5),
-       ARM64_SYS_REG(2, 0, 0, 0, 6),
-       ARM64_SYS_REG(2, 0, 0, 0, 7),
-       ARM64_SYS_REG(2, 0, 0, 1, 4),
-       ARM64_SYS_REG(2, 0, 0, 1, 5),
-       ARM64_SYS_REG(2, 0, 0, 1, 6),
-       ARM64_SYS_REG(2, 0, 0, 1, 7),
-       ARM64_SYS_REG(2, 0, 0, 2, 0),   /* MDCCINT_EL1 */
-       ARM64_SYS_REG(2, 0, 0, 2, 2),   /* MDSCR_EL1 */
-       ARM64_SYS_REG(2, 0, 0, 2, 4),
-       ARM64_SYS_REG(2, 0, 0, 2, 5),
-       ARM64_SYS_REG(2, 0, 0, 2, 6),
-       ARM64_SYS_REG(2, 0, 0, 2, 7),
-       ARM64_SYS_REG(2, 0, 0, 3, 4),
-       ARM64_SYS_REG(2, 0, 0, 3, 5),
-       ARM64_SYS_REG(2, 0, 0, 3, 6),
-       ARM64_SYS_REG(2, 0, 0, 3, 7),
-       ARM64_SYS_REG(2, 0, 0, 4, 4),
-       ARM64_SYS_REG(2, 0, 0, 4, 5),
-       ARM64_SYS_REG(2, 0, 0, 4, 6),
-       ARM64_SYS_REG(2, 0, 0, 4, 7),
-       ARM64_SYS_REG(2, 0, 0, 5, 4),
-       ARM64_SYS_REG(2, 0, 0, 5, 5),
-       ARM64_SYS_REG(2, 0, 0, 5, 6),
-       ARM64_SYS_REG(2, 0, 0, 5, 7),
-       ARM64_SYS_REG(2, 0, 0, 6, 4),
-       ARM64_SYS_REG(2, 0, 0, 6, 5),
-       ARM64_SYS_REG(2, 0, 0, 6, 6),
-       ARM64_SYS_REG(2, 0, 0, 6, 7),
-       ARM64_SYS_REG(2, 0, 0, 7, 4),
-       ARM64_SYS_REG(2, 0, 0, 7, 5),
-       ARM64_SYS_REG(2, 0, 0, 7, 6),
-       ARM64_SYS_REG(2, 0, 0, 7, 7),
-       ARM64_SYS_REG(2, 0, 0, 8, 4),
-       ARM64_SYS_REG(2, 0, 0, 8, 5),
-       ARM64_SYS_REG(2, 0, 0, 8, 6),
-       ARM64_SYS_REG(2, 0, 0, 8, 7),
-       ARM64_SYS_REG(2, 0, 0, 9, 4),
-       ARM64_SYS_REG(2, 0, 0, 9, 5),
-       ARM64_SYS_REG(2, 0, 0, 9, 6),
-       ARM64_SYS_REG(2, 0, 0, 9, 7),
-       ARM64_SYS_REG(2, 0, 0, 10, 4),
-       ARM64_SYS_REG(2, 0, 0, 10, 5),
-       ARM64_SYS_REG(2, 0, 0, 10, 6),
-       ARM64_SYS_REG(2, 0, 0, 10, 7),
-       ARM64_SYS_REG(2, 0, 0, 11, 4),
-       ARM64_SYS_REG(2, 0, 0, 11, 5),
-       ARM64_SYS_REG(2, 0, 0, 11, 6),
-       ARM64_SYS_REG(2, 0, 0, 11, 7),
-       ARM64_SYS_REG(2, 0, 0, 12, 4),
-       ARM64_SYS_REG(2, 0, 0, 12, 5),
-       ARM64_SYS_REG(2, 0, 0, 12, 6),
-       ARM64_SYS_REG(2, 0, 0, 12, 7),
-       ARM64_SYS_REG(2, 0, 0, 13, 4),
-       ARM64_SYS_REG(2, 0, 0, 13, 5),
-       ARM64_SYS_REG(2, 0, 0, 13, 6),
-       ARM64_SYS_REG(2, 0, 0, 13, 7),
-       ARM64_SYS_REG(2, 0, 0, 14, 4),
-       ARM64_SYS_REG(2, 0, 0, 14, 5),
-       ARM64_SYS_REG(2, 0, 0, 14, 6),
-       ARM64_SYS_REG(2, 0, 0, 14, 7),
-       ARM64_SYS_REG(2, 0, 0, 15, 4),
-       ARM64_SYS_REG(2, 0, 0, 15, 5),
-       ARM64_SYS_REG(2, 0, 0, 15, 6),
-       ARM64_SYS_REG(2, 0, 0, 15, 7),
-       ARM64_SYS_REG(2, 0, 1, 1, 4),   /* OSLSR_EL1 */
-       ARM64_SYS_REG(2, 4, 0, 7, 0),   /* DBGVCR32_EL2 */
-       ARM64_SYS_REG(3, 0, 0, 0, 5),   /* MPIDR_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 0),   /* ID_PFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 1),   /* ID_PFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 2),   /* ID_DFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 3),   /* ID_AFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 4),   /* ID_MMFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 5),   /* ID_MMFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 6),   /* ID_MMFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 1, 7),   /* ID_MMFR3_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 0),   /* ID_ISAR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 1),   /* ID_ISAR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 2),   /* ID_ISAR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 3),   /* ID_ISAR3_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 4),   /* ID_ISAR4_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 5),   /* ID_ISAR5_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 6),   /* ID_MMFR4_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 2, 7),   /* ID_ISAR6_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 0),   /* MVFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 1),   /* MVFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 2),   /* MVFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 3),
-       ARM64_SYS_REG(3, 0, 0, 3, 4),   /* ID_PFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 5),   /* ID_DFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 6),   /* ID_MMFR5_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 3, 7),
-       ARM64_SYS_REG(3, 0, 0, 4, 0),   /* ID_AA64PFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 1),   /* ID_AA64PFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 2),   /* ID_AA64PFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 3),
-       ARM64_SYS_REG(3, 0, 0, 4, 4),   /* ID_AA64ZFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 5),   /* ID_AA64SMFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 4, 6),
-       ARM64_SYS_REG(3, 0, 0, 4, 7),
-       ARM64_SYS_REG(3, 0, 0, 5, 0),   /* ID_AA64DFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 5, 1),   /* ID_AA64DFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 5, 2),
-       ARM64_SYS_REG(3, 0, 0, 5, 3),
-       ARM64_SYS_REG(3, 0, 0, 5, 4),   /* ID_AA64AFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 5, 5),   /* ID_AA64AFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 5, 6),
-       ARM64_SYS_REG(3, 0, 0, 5, 7),
-       ARM64_SYS_REG(3, 0, 0, 6, 0),   /* ID_AA64ISAR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 6, 1),   /* ID_AA64ISAR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 6, 2),   /* ID_AA64ISAR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 6, 3),
-       ARM64_SYS_REG(3, 0, 0, 6, 4),
-       ARM64_SYS_REG(3, 0, 0, 6, 5),
-       ARM64_SYS_REG(3, 0, 0, 6, 6),
-       ARM64_SYS_REG(3, 0, 0, 6, 7),
-       ARM64_SYS_REG(3, 0, 0, 7, 0),   /* ID_AA64MMFR0_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 1),   /* ID_AA64MMFR1_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 2),   /* ID_AA64MMFR2_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 4),   /* ID_AA64MMFR4_EL1 */
-       ARM64_SYS_REG(3, 0, 0, 7, 5),
-       ARM64_SYS_REG(3, 0, 0, 7, 6),
-       ARM64_SYS_REG(3, 0, 0, 7, 7),
-       ARM64_SYS_REG(3, 0, 1, 0, 0),   /* SCTLR_EL1 */
-       ARM64_SYS_REG(3, 0, 1, 0, 1),   /* ACTLR_EL1 */
-       ARM64_SYS_REG(3, 0, 1, 0, 2),   /* CPACR_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 0, 0),   /* TTBR0_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 0, 1),   /* TTBR1_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 0, 2),   /* TCR_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 0, 3),   /* TCR2_EL1 */
-       ARM64_SYS_REG(3, 0, 5, 1, 0),   /* AFSR0_EL1 */
-       ARM64_SYS_REG(3, 0, 5, 1, 1),   /* AFSR1_EL1 */
-       ARM64_SYS_REG(3, 0, 5, 2, 0),   /* ESR_EL1 */
-       ARM64_SYS_REG(3, 0, 6, 0, 0),   /* FAR_EL1 */
-       ARM64_SYS_REG(3, 0, 7, 4, 0),   /* PAR_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 2, 0),  /* MAIR_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 2, 2),  /* PIRE0_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 2, 3),  /* PIR_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 2, 4),  /* POR_EL1 */
-       ARM64_SYS_REG(3, 0, 10, 3, 0),  /* AMAIR_EL1 */
-       ARM64_SYS_REG(3, 0, 12, 0, 0),  /* VBAR_EL1 */
-       ARM64_SYS_REG(3, 0, 12, 1, 1),  /* DISR_EL1 */
-       ARM64_SYS_REG(3, 0, 13, 0, 1),  /* CONTEXTIDR_EL1 */
-       ARM64_SYS_REG(3, 0, 13, 0, 4),  /* TPIDR_EL1 */
-       ARM64_SYS_REG(3, 0, 14, 1, 0),  /* CNTKCTL_EL1 */
-       ARM64_SYS_REG(3, 2, 0, 0, 0),   /* CSSELR_EL1 */
-       ARM64_SYS_REG(3, 3, 10, 2, 4),  /* POR_EL0 */
-       ARM64_SYS_REG(3, 3, 13, 0, 2),  /* TPIDR_EL0 */
-       ARM64_SYS_REG(3, 3, 13, 0, 3),  /* TPIDRRO_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 0, 1),  /* CNTPCT_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 2, 1),  /* CNTP_CTL_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 2, 2),  /* CNTP_CVAL_EL0 */
-       ARM64_SYS_REG(3, 4, 3, 0, 0),   /* DACR32_EL2 */
-       ARM64_SYS_REG(3, 4, 5, 0, 1),   /* IFSR32_EL2 */
-       ARM64_SYS_REG(3, 4, 5, 3, 0),   /* FPEXC32_EL2 */
-};
-
-static __u64 pmu_regs[] = {
-       ARM64_SYS_REG(3, 0, 9, 14, 1),  /* PMINTENSET_EL1 */
-       ARM64_SYS_REG(3, 0, 9, 14, 2),  /* PMINTENCLR_EL1 */
-       ARM64_SYS_REG(3, 3, 9, 12, 0),  /* PMCR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 1),  /* PMCNTENSET_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 2),  /* PMCNTENCLR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 3),  /* PMOVSCLR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 4),  /* PMSWINC_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 12, 5),  /* PMSELR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 13, 0),  /* PMCCNTR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 14, 0),  /* PMUSERENR_EL0 */
-       ARM64_SYS_REG(3, 3, 9, 14, 3),  /* PMOVSSET_EL0 */
-       ARM64_SYS_REG(3, 3, 14, 8, 0),
-       ARM64_SYS_REG(3, 3, 14, 8, 1),
-       ARM64_SYS_REG(3, 3, 14, 8, 2),
-       ARM64_SYS_REG(3, 3, 14, 8, 3),
-       ARM64_SYS_REG(3, 3, 14, 8, 4),
-       ARM64_SYS_REG(3, 3, 14, 8, 5),
-       ARM64_SYS_REG(3, 3, 14, 8, 6),
-       ARM64_SYS_REG(3, 3, 14, 8, 7),
-       ARM64_SYS_REG(3, 3, 14, 9, 0),
-       ARM64_SYS_REG(3, 3, 14, 9, 1),
-       ARM64_SYS_REG(3, 3, 14, 9, 2),
-       ARM64_SYS_REG(3, 3, 14, 9, 3),
-       ARM64_SYS_REG(3, 3, 14, 9, 4),
-       ARM64_SYS_REG(3, 3, 14, 9, 5),
-       ARM64_SYS_REG(3, 3, 14, 9, 6),
-       ARM64_SYS_REG(3, 3, 14, 9, 7),
-       ARM64_SYS_REG(3, 3, 14, 10, 0),
-       ARM64_SYS_REG(3, 3, 14, 10, 1),
-       ARM64_SYS_REG(3, 3, 14, 10, 2),
-       ARM64_SYS_REG(3, 3, 14, 10, 3),
-       ARM64_SYS_REG(3, 3, 14, 10, 4),
-       ARM64_SYS_REG(3, 3, 14, 10, 5),
-       ARM64_SYS_REG(3, 3, 14, 10, 6),
-       ARM64_SYS_REG(3, 3, 14, 10, 7),
-       ARM64_SYS_REG(3, 3, 14, 11, 0),
-       ARM64_SYS_REG(3, 3, 14, 11, 1),
-       ARM64_SYS_REG(3, 3, 14, 11, 2),
-       ARM64_SYS_REG(3, 3, 14, 11, 3),
-       ARM64_SYS_REG(3, 3, 14, 11, 4),
-       ARM64_SYS_REG(3, 3, 14, 11, 5),
-       ARM64_SYS_REG(3, 3, 14, 11, 6),
-       ARM64_SYS_REG(3, 3, 14, 12, 0),
-       ARM64_SYS_REG(3, 3, 14, 12, 1),
-       ARM64_SYS_REG(3, 3, 14, 12, 2),
-       ARM64_SYS_REG(3, 3, 14, 12, 3),
-       ARM64_SYS_REG(3, 3, 14, 12, 4),
-       ARM64_SYS_REG(3, 3, 14, 12, 5),
-       ARM64_SYS_REG(3, 3, 14, 12, 6),
-       ARM64_SYS_REG(3, 3, 14, 12, 7),
-       ARM64_SYS_REG(3, 3, 14, 13, 0),
-       ARM64_SYS_REG(3, 3, 14, 13, 1),
-       ARM64_SYS_REG(3, 3, 14, 13, 2),
-       ARM64_SYS_REG(3, 3, 14, 13, 3),
-       ARM64_SYS_REG(3, 3, 14, 13, 4),
-       ARM64_SYS_REG(3, 3, 14, 13, 5),
-       ARM64_SYS_REG(3, 3, 14, 13, 6),
-       ARM64_SYS_REG(3, 3, 14, 13, 7),
-       ARM64_SYS_REG(3, 3, 14, 14, 0),
-       ARM64_SYS_REG(3, 3, 14, 14, 1),
-       ARM64_SYS_REG(3, 3, 14, 14, 2),
-       ARM64_SYS_REG(3, 3, 14, 14, 3),
-       ARM64_SYS_REG(3, 3, 14, 14, 4),
-       ARM64_SYS_REG(3, 3, 14, 14, 5),
-       ARM64_SYS_REG(3, 3, 14, 14, 6),
-       ARM64_SYS_REG(3, 3, 14, 14, 7),
-       ARM64_SYS_REG(3, 3, 14, 15, 0),
-       ARM64_SYS_REG(3, 3, 14, 15, 1),
-       ARM64_SYS_REG(3, 3, 14, 15, 2),
-       ARM64_SYS_REG(3, 3, 14, 15, 3),
-       ARM64_SYS_REG(3, 3, 14, 15, 4),
-       ARM64_SYS_REG(3, 3, 14, 15, 5),
-       ARM64_SYS_REG(3, 3, 14, 15, 6),
-       ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
-};
-
-static __u64 vregs[] = {
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
-       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
-};
-
-static __u64 sve_regs[] = {
-       KVM_REG_ARM64_SVE_VLS,
-       KVM_REG_ARM64_SVE_ZREG(0, 0),
-       KVM_REG_ARM64_SVE_ZREG(1, 0),
-       KVM_REG_ARM64_SVE_ZREG(2, 0),
-       KVM_REG_ARM64_SVE_ZREG(3, 0),
-       KVM_REG_ARM64_SVE_ZREG(4, 0),
-       KVM_REG_ARM64_SVE_ZREG(5, 0),
-       KVM_REG_ARM64_SVE_ZREG(6, 0),
-       KVM_REG_ARM64_SVE_ZREG(7, 0),
-       KVM_REG_ARM64_SVE_ZREG(8, 0),
-       KVM_REG_ARM64_SVE_ZREG(9, 0),
-       KVM_REG_ARM64_SVE_ZREG(10, 0),
-       KVM_REG_ARM64_SVE_ZREG(11, 0),
-       KVM_REG_ARM64_SVE_ZREG(12, 0),
-       KVM_REG_ARM64_SVE_ZREG(13, 0),
-       KVM_REG_ARM64_SVE_ZREG(14, 0),
-       KVM_REG_ARM64_SVE_ZREG(15, 0),
-       KVM_REG_ARM64_SVE_ZREG(16, 0),
-       KVM_REG_ARM64_SVE_ZREG(17, 0),
-       KVM_REG_ARM64_SVE_ZREG(18, 0),
-       KVM_REG_ARM64_SVE_ZREG(19, 0),
-       KVM_REG_ARM64_SVE_ZREG(20, 0),
-       KVM_REG_ARM64_SVE_ZREG(21, 0),
-       KVM_REG_ARM64_SVE_ZREG(22, 0),
-       KVM_REG_ARM64_SVE_ZREG(23, 0),
-       KVM_REG_ARM64_SVE_ZREG(24, 0),
-       KVM_REG_ARM64_SVE_ZREG(25, 0),
-       KVM_REG_ARM64_SVE_ZREG(26, 0),
-       KVM_REG_ARM64_SVE_ZREG(27, 0),
-       KVM_REG_ARM64_SVE_ZREG(28, 0),
-       KVM_REG_ARM64_SVE_ZREG(29, 0),
-       KVM_REG_ARM64_SVE_ZREG(30, 0),
-       KVM_REG_ARM64_SVE_ZREG(31, 0),
-       KVM_REG_ARM64_SVE_PREG(0, 0),
-       KVM_REG_ARM64_SVE_PREG(1, 0),
-       KVM_REG_ARM64_SVE_PREG(2, 0),
-       KVM_REG_ARM64_SVE_PREG(3, 0),
-       KVM_REG_ARM64_SVE_PREG(4, 0),
-       KVM_REG_ARM64_SVE_PREG(5, 0),
-       KVM_REG_ARM64_SVE_PREG(6, 0),
-       KVM_REG_ARM64_SVE_PREG(7, 0),
-       KVM_REG_ARM64_SVE_PREG(8, 0),
-       KVM_REG_ARM64_SVE_PREG(9, 0),
-       KVM_REG_ARM64_SVE_PREG(10, 0),
-       KVM_REG_ARM64_SVE_PREG(11, 0),
-       KVM_REG_ARM64_SVE_PREG(12, 0),
-       KVM_REG_ARM64_SVE_PREG(13, 0),
-       KVM_REG_ARM64_SVE_PREG(14, 0),
-       KVM_REG_ARM64_SVE_PREG(15, 0),
-       KVM_REG_ARM64_SVE_FFR(0),
-       ARM64_SYS_REG(3, 0, 1, 2, 0),   /* ZCR_EL1 */
-};
-
-static __u64 sve_rejects_set[] = {
-       KVM_REG_ARM64_SVE_VLS,
-};
-
-static __u64 pauth_addr_regs[] = {
-       ARM64_SYS_REG(3, 0, 2, 1, 0),   /* APIAKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 1, 1),   /* APIAKEYHI_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 1, 2),   /* APIBKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 1, 3),   /* APIBKEYHI_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 2, 0),   /* APDAKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 2, 1),   /* APDAKEYHI_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 2, 2),   /* APDBKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 2, 3)    /* APDBKEYHI_EL1 */
-};
-
-static __u64 pauth_generic_regs[] = {
-       ARM64_SYS_REG(3, 0, 2, 3, 0),   /* APGAKEYLO_EL1 */
-       ARM64_SYS_REG(3, 0, 2, 3, 1),   /* APGAKEYHI_EL1 */
-};
-
-#define BASE_SUBLIST \
-       { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
-#define VREGS_SUBLIST \
-       { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
-#define PMU_SUBLIST \
-       { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
-         .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
-#define SVE_SUBLIST \
-       { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
-         .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
-         .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
-#define PAUTH_SUBLIST                                                  \
-       {                                                               \
-               .name           = "pauth_address",                      \
-               .capability     = KVM_CAP_ARM_PTRAUTH_ADDRESS,          \
-               .feature        = KVM_ARM_VCPU_PTRAUTH_ADDRESS,         \
-               .regs           = pauth_addr_regs,                      \
-               .regs_n         = ARRAY_SIZE(pauth_addr_regs),          \
-       },                                                              \
-       {                                                               \
-               .name           = "pauth_generic",                      \
-               .capability     = KVM_CAP_ARM_PTRAUTH_GENERIC,          \
-               .feature        = KVM_ARM_VCPU_PTRAUTH_GENERIC,         \
-               .regs           = pauth_generic_regs,                   \
-               .regs_n         = ARRAY_SIZE(pauth_generic_regs),       \
-       }
-
-static struct vcpu_reg_list vregs_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       VREGS_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list vregs_pmu_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       VREGS_SUBLIST,
-       PMU_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list sve_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       SVE_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list sve_pmu_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       SVE_SUBLIST,
-       PMU_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list pauth_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       VREGS_SUBLIST,
-       PAUTH_SUBLIST,
-       {0},
-       },
-};
-static struct vcpu_reg_list pauth_pmu_config = {
-       .sublists = {
-       BASE_SUBLIST,
-       VREGS_SUBLIST,
-       PAUTH_SUBLIST,
-       PMU_SUBLIST,
-       {0},
-       },
-};
-
-struct vcpu_reg_list *vcpu_configs[] = {
-       &vregs_config,
-       &vregs_pmu_config,
-       &sve_config,
-       &sve_pmu_config,
-       &pauth_config,
-       &pauth_pmu_config,
-};
-int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c
deleted file mode 100644 (file)
index ec54ec7..0000000
+++ /dev/null
@@ -1,308 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface.
- *
- * The test validates the basic hypercall functionalities that are exposed
- * via the psuedo-firmware bitmap register. This includes the registers'
- * read/write behavior before and after the VM has started, and if the
- * hypercalls are properly masked or unmasked to the guest when disabled or
- * enabled from the KVM userspace, respectively.
- */
-#include <errno.h>
-#include <linux/arm-smccc.h>
-#include <asm/kvm.h>
-#include <kvm_util.h>
-
-#include "processor.h"
-
-#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0))
-
-/* Last valid bits of the bitmapped firmware registers */
-#define KVM_REG_ARM_STD_BMAP_BIT_MAX           0
-#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX       0
-#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX    1
-
-struct kvm_fw_reg_info {
-       uint64_t reg;           /* Register definition */
-       uint64_t max_feat_bit;  /* Bit that represents the upper limit of the feature-map */
-};
-
-#define FW_REG_INFO(r)                 \
-       {                                       \
-               .reg = r,                       \
-               .max_feat_bit = r##_BIT_MAX,    \
-       }
-
-static const struct kvm_fw_reg_info fw_reg_info[] = {
-       FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
-       FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
-       FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
-};
-
-enum test_stage {
-       TEST_STAGE_REG_IFACE,
-       TEST_STAGE_HVC_IFACE_FEAT_DISABLED,
-       TEST_STAGE_HVC_IFACE_FEAT_ENABLED,
-       TEST_STAGE_HVC_IFACE_FALSE_INFO,
-       TEST_STAGE_END,
-};
-
-static int stage = TEST_STAGE_REG_IFACE;
-
-struct test_hvc_info {
-       uint32_t func_id;
-       uint64_t arg1;
-};
-
-#define TEST_HVC_INFO(f, a1)   \
-       {                       \
-               .func_id = f,   \
-               .arg1 = a1,     \
-       }
-
-static const struct test_hvc_info hvc_info[] = {
-       /* KVM_REG_ARM_STD_BMAP */
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0),
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64),
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0),
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0),
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0),
-
-       /* KVM_REG_ARM_STD_HYP_BMAP */
-       TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES),
-       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST),
-       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0),
-
-       /* KVM_REG_ARM_VENDOR_HYP_BMAP */
-       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID,
-                       ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
-       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0),
-       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER),
-};
-
-/* Feed false hypercall info to test the KVM behavior */
-static const struct test_hvc_info false_hvc_info[] = {
-       /* Feature support check against a different family of hypercalls */
-       TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
-       TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64),
-       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64),
-};
-
-static void guest_test_hvc(const struct test_hvc_info *hc_info)
-{
-       unsigned int i;
-       struct arm_smccc_res res;
-       unsigned int hvc_info_arr_sz;
-
-       hvc_info_arr_sz =
-       hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info);
-
-       for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
-               memset(&res, 0, sizeof(res));
-               smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
-
-               switch (stage) {
-               case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
-               case TEST_STAGE_HVC_IFACE_FALSE_INFO:
-                       __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
-                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
-                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
-                       break;
-               case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
-                       __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
-                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
-                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
-                       break;
-               default:
-                       GUEST_FAIL("Unexpected stage = %u", stage);
-               }
-       }
-}
-
-static void guest_code(void)
-{
-       while (stage != TEST_STAGE_END) {
-               switch (stage) {
-               case TEST_STAGE_REG_IFACE:
-                       break;
-               case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
-               case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
-                       guest_test_hvc(hvc_info);
-                       break;
-               case TEST_STAGE_HVC_IFACE_FALSE_INFO:
-                       guest_test_hvc(false_hvc_info);
-                       break;
-               default:
-                       GUEST_FAIL("Unexpected stage = %u", stage);
-               }
-
-               GUEST_SYNC(stage);
-       }
-
-       GUEST_DONE();
-}
-
-struct st_time {
-       uint32_t rev;
-       uint32_t attr;
-       uint64_t st_time;
-};
-
-#define STEAL_TIME_SIZE                ((sizeof(struct st_time) + 63) & ~63)
-#define ST_GPA_BASE            (1 << 30)
-
-static void steal_time_init(struct kvm_vcpu *vcpu)
-{
-       uint64_t st_ipa = (ulong)ST_GPA_BASE;
-       unsigned int gpages;
-
-       gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
-       vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
-
-       vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL,
-                            KVM_ARM_VCPU_PVTIME_IPA, &st_ipa);
-}
-
-static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
-{
-       uint64_t val;
-       unsigned int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
-               const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
-
-               /* First 'read' should be an upper limit of the features supported */
-               val = vcpu_get_reg(vcpu, reg_info->reg);
-               TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
-                       "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
-                       reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
-
-               /* Test a 'write' by disabling all the features of the register map */
-               ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
-               TEST_ASSERT(ret == 0,
-                       "Failed to clear all the features of reg: 0x%lx; ret: %d",
-                       reg_info->reg, errno);
-
-               val = vcpu_get_reg(vcpu, reg_info->reg);
-               TEST_ASSERT(val == 0,
-                       "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
-
-               /*
-                * Test enabling a feature that's not supported.
-                * Avoid this check if all the bits are occupied.
-                */
-               if (reg_info->max_feat_bit < 63) {
-                       ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
-                       TEST_ASSERT(ret != 0 && errno == EINVAL,
-                       "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
-                       errno, reg_info->reg);
-               }
-       }
-}
-
-static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
-{
-       uint64_t val;
-       unsigned int i;
-       int ret;
-
-       for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
-               const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
-
-               /*
-                * Before starting the VM, the test clears all the bits.
-                * Check if that's still the case.
-                */
-               val = vcpu_get_reg(vcpu, reg_info->reg);
-               TEST_ASSERT(val == 0,
-                       "Expected all the features to be cleared for reg: 0x%lx",
-                       reg_info->reg);
-
-               /*
-                * Since the VM has run at least once, KVM shouldn't allow modification of
-                * the registers and should return EBUSY. Set the registers and check for
-                * the expected errno.
-                */
-               ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
-               TEST_ASSERT(ret != 0 && errno == EBUSY,
-               "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
-               errno, reg_info->reg);
-       }
-}
-
-static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
-{
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
-       steal_time_init(*vcpu);
-
-       return vm;
-}
-
-static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
-{
-       int prev_stage = stage;
-
-       pr_debug("Stage: %d\n", prev_stage);
-
-       /* Sync the stage early, the VM might be freed below. */
-       stage++;
-       sync_global_to_guest(*vm, stage);
-
-       switch (prev_stage) {
-       case TEST_STAGE_REG_IFACE:
-               test_fw_regs_after_vm_start(*vcpu);
-               break;
-       case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
-               /* Start a new VM so that all the features are now enabled by default */
-               kvm_vm_free(*vm);
-               *vm = test_vm_create(vcpu);
-               break;
-       case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
-       case TEST_STAGE_HVC_IFACE_FALSE_INFO:
-               break;
-       default:
-               TEST_FAIL("Unknown test stage: %d", prev_stage);
-       }
-}
-
-static void test_run(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       bool guest_done = false;
-
-       vm = test_vm_create(&vcpu);
-
-       test_fw_regs_before_vm_start(vcpu);
-
-       while (!guest_done) {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       test_guest_stage(&vm, &vcpu);
-                       break;
-               case UCALL_DONE:
-                       guest_done = true;
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               default:
-                       TEST_FAIL("Unexpected guest exit");
-               }
-       }
-
-       kvm_vm_free(vm);
-}
-
-int main(void)
-{
-       test_run();
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/aarch64/mmio_abort.c
deleted file mode 100644 (file)
index 8b7a80a..0000000
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mmio_abort - Tests for userspace MMIO abort injection
- *
- * Copyright (c) 2024 Google LLC
- */
-#include "processor.h"
-#include "test_util.h"
-
-#define MMIO_ADDR      0x8000000ULL
-
-static u64 expected_abort_pc;
-
-static void expect_sea_handler(struct ex_regs *regs)
-{
-       u64 esr = read_sysreg(esr_el1);
-
-       GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
-       GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
-       GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
-
-       GUEST_DONE();
-}
-
-static void unexpected_dabt_handler(struct ex_regs *regs)
-{
-       GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
-}
-
-static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
-                                                 handler_fn dabt_handler)
-{
-       struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(*vcpu);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
-
-       virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
-
-       return vm;
-}
-
-static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
-{
-       struct kvm_vcpu_events events = {};
-
-       events.exception.ext_dabt_pending = true;
-       vcpu_events_set(vcpu, &events);
-}
-
-static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-       }
-}
-
-extern char test_mmio_abort_insn;
-
-static void test_mmio_abort_guest(void)
-{
-       WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
-
-       asm volatile("test_mmio_abort_insn:\n\t"
-                    "ldr x0, [%0]\n\t"
-                    : : "r" (MMIO_ADDR) : "x0", "memory");
-
-       GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that KVM doesn't complete MMIO emulation when userspace has made an
- * external abort pending for the instruction.
- */
-static void test_mmio_abort(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
-                                                       expect_sea_handler);
-       struct kvm_run *run = vcpu->run;
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
-       TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
-       TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
-       TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
-
-       vcpu_inject_extabt(vcpu);
-       vcpu_run_expect_done(vcpu);
-       kvm_vm_free(vm);
-}
-
-extern char test_mmio_nisv_insn;
-
-static void test_mmio_nisv_guest(void)
-{
-       WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
-
-       asm volatile("test_mmio_nisv_insn:\n\t"
-                    "ldr x0, [%0], #8\n\t"
-                    : : "r" (MMIO_ADDR) : "x0", "memory");
-
-       GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
- * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
- */
-static void test_mmio_nisv(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
-                                                       unexpected_dabt_handler);
-
-       TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
-       TEST_ASSERT_EQ(errno, ENOSYS);
-
-       kvm_vm_free(vm);
-}
-
-/*
- * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
- * reaches the guest.
- */
-static void test_mmio_nisv_abort(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
-                                                       expect_sea_handler);
-       struct kvm_run *run = vcpu->run;
-
-       vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
-       TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
-
-       vcpu_inject_extabt(vcpu);
-       vcpu_run_expect_done(vcpu);
-       kvm_vm_free(vm);
-}
-
-int main(void)
-{
-       test_mmio_abort();
-       test_mmio_nisv();
-       test_mmio_nisv_abort();
-}
diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
deleted file mode 100644 (file)
index ebd7043..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// Check that, on a GICv3 system, not configuring GICv3 correctly
-// results in all of the sysregs generating an UNDEF exception.
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-static volatile bool handled;
-
-#define __check_sr_read(r)                                     \
-       ({                                                      \
-               uint64_t val;                                   \
-                                                               \
-               handled = false;                                \
-               dsb(sy);                                        \
-               val = read_sysreg_s(SYS_ ## r);                 \
-               val;                                            \
-       })
-
-#define __check_sr_write(r)                                    \
-       do {                                                    \
-               handled = false;                                \
-               dsb(sy);                                        \
-               write_sysreg_s(0, SYS_ ## r);                   \
-               isb();                                          \
-       } while(0)
-
-/* Fatal checks */
-#define check_sr_read(r)                                       \
-       do {                                                    \
-               __check_sr_read(r);                             \
-               __GUEST_ASSERT(handled, #r " no read trap");    \
-       } while(0)
-
-#define check_sr_write(r)                                      \
-       do {                                                    \
-               __check_sr_write(r);                            \
-               __GUEST_ASSERT(handled, #r " no write trap");   \
-       } while(0)
-
-#define check_sr_rw(r)                         \
-       do {                                    \
-               check_sr_read(r);               \
-               check_sr_write(r);              \
-       } while(0)
-
-static void guest_code(void)
-{
-       uint64_t val;
-
-       /*
-        * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
-        * hidden the feature at runtime without any other userspace action.
-        */
-       __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
-                                read_sysreg(id_aa64pfr0_el1)) == 0,
-                      "GICv3 wrongly advertised");
-
-       /*
-        * Access all GICv3 registers, and fail if we don't get an UNDEF.
-        * Note that we happily access all the APxRn registers without
-        * checking their existance, as all we want to see is a failure.
-        */
-       check_sr_rw(ICC_PMR_EL1);
-       check_sr_read(ICC_IAR0_EL1);
-       check_sr_write(ICC_EOIR0_EL1);
-       check_sr_rw(ICC_HPPIR0_EL1);
-       check_sr_rw(ICC_BPR0_EL1);
-       check_sr_rw(ICC_AP0R0_EL1);
-       check_sr_rw(ICC_AP0R1_EL1);
-       check_sr_rw(ICC_AP0R2_EL1);
-       check_sr_rw(ICC_AP0R3_EL1);
-       check_sr_rw(ICC_AP1R0_EL1);
-       check_sr_rw(ICC_AP1R1_EL1);
-       check_sr_rw(ICC_AP1R2_EL1);
-       check_sr_rw(ICC_AP1R3_EL1);
-       check_sr_write(ICC_DIR_EL1);
-       check_sr_read(ICC_RPR_EL1);
-       check_sr_write(ICC_SGI1R_EL1);
-       check_sr_write(ICC_ASGI1R_EL1);
-       check_sr_write(ICC_SGI0R_EL1);
-       check_sr_read(ICC_IAR1_EL1);
-       check_sr_write(ICC_EOIR1_EL1);
-       check_sr_rw(ICC_HPPIR1_EL1);
-       check_sr_rw(ICC_BPR1_EL1);
-       check_sr_rw(ICC_CTLR_EL1);
-       check_sr_rw(ICC_IGRPEN0_EL1);
-       check_sr_rw(ICC_IGRPEN1_EL1);
-
-       /*
-        * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
-        * be RAO/WI. Engage in non-fatal accesses, starting with a
-        * write of 0 to try and disable SRE, and let's see if it
-        * sticks.
-        */
-       __check_sr_write(ICC_SRE_EL1);
-       if (!handled)
-               GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
-
-       val = __check_sr_read(ICC_SRE_EL1);
-       if (!handled) {
-               __GUEST_ASSERT((val & BIT(0)),
-                              "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
-               GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
-       }
-
-       GUEST_DONE();
-}
-
-static void guest_undef_handler(struct ex_regs *regs)
-{
-       /* Success, we've gracefully exploded! */
-       handled = true;
-       regs->pc += 4;
-}
-
-static void test_run_vcpu(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       do {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_PRINTF:
-                       printf("%s", uc.buffer);
-                       break;
-               case UCALL_DONE:
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       } while (uc.cmd != UCALL_DONE);
-}
-
-static void test_guest_no_gicv3(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       /* Create a VM without a GICv3 */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vcpu);
-
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_UNKNOWN, guest_undef_handler);
-
-       test_run_vcpu(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t pfr0;
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-       pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-       __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
-                      "GICv3 not supported.");
-       kvm_vm_free(vm);
-
-       test_guest_no_gicv3();
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
deleted file mode 100644 (file)
index ec33a8f..0000000
+++ /dev/null
@@ -1,1135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * page_fault_test.c - Test stage 2 faults.
- *
- * This test tries different combinations of guest accesses (e.g., write,
- * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
- * hugetlbfs with a hole). It checks that the expected handling method is
- * called (e.g., uffd faults with the right address and write/read flag).
- */
-#include <linux/bitmap.h>
-#include <fcntl.h>
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-#include <asm/sysreg.h>
-#include <linux/bitfield.h>
-#include "guest_modes.h"
-#include "userfaultfd_util.h"
-
-/* Guest virtual addresses that point to the test page and its PTE. */
-#define TEST_GVA                               0xc0000000
-#define TEST_EXEC_GVA                          (TEST_GVA + 0x8)
-#define TEST_PTE_GVA                           0xb0000000
-#define TEST_DATA                              0x0123456789ABCDEF
-
-static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
-
-#define CMD_NONE                               (0)
-#define CMD_SKIP_TEST                          (1ULL << 1)
-#define CMD_HOLE_PT                            (1ULL << 2)
-#define CMD_HOLE_DATA                          (1ULL << 3)
-#define CMD_CHECK_WRITE_IN_DIRTY_LOG           (1ULL << 4)
-#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG                (1ULL << 5)
-#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG                (1ULL << 6)
-#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG     (1ULL << 7)
-#define CMD_SET_PTE_AF                         (1ULL << 8)
-
-#define PREPARE_FN_NR                          10
-#define CHECK_FN_NR                            10
-
-static struct event_cnt {
-       int mmio_exits;
-       int fail_vcpu_runs;
-       int uffd_faults;
-       /* uffd_faults is incremented from multiple threads. */
-       pthread_mutex_t uffd_faults_mutex;
-} events;
-
-struct test_desc {
-       const char *name;
-       uint64_t mem_mark_cmd;
-       /* Skip the test if any prepare function returns false */
-       bool (*guest_prepare[PREPARE_FN_NR])(void);
-       void (*guest_test)(void);
-       void (*guest_test_check[CHECK_FN_NR])(void);
-       uffd_handler_t uffd_pt_handler;
-       uffd_handler_t uffd_data_handler;
-       void (*dabt_handler)(struct ex_regs *regs);
-       void (*iabt_handler)(struct ex_regs *regs);
-       void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
-       void (*fail_vcpu_run_handler)(int ret);
-       uint32_t pt_memslot_flags;
-       uint32_t data_memslot_flags;
-       bool skip;
-       struct event_cnt expected_events;
-};
-
-struct test_params {
-       enum vm_mem_backing_src_type src_type;
-       struct test_desc *test_desc;
-};
-
-static inline void flush_tlb_page(uint64_t vaddr)
-{
-       uint64_t page = vaddr >> 12;
-
-       dsb(ishst);
-       asm volatile("tlbi vaae1is, %0" :: "r" (page));
-       dsb(ish);
-       isb();
-}
-
-static void guest_write64(void)
-{
-       uint64_t val;
-
-       WRITE_ONCE(*guest_test_memory, TEST_DATA);
-       val = READ_ONCE(*guest_test_memory);
-       GUEST_ASSERT_EQ(val, TEST_DATA);
-}
-
-/* Check the system for atomic instructions. */
-static bool guest_check_lse(void)
-{
-       uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
-       uint64_t atomic;
-
-       atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
-       return atomic >= 2;
-}
-
-static bool guest_check_dc_zva(void)
-{
-       uint64_t dczid = read_sysreg(dczid_el0);
-       uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
-
-       return dzp == 0;
-}
-
-/* Compare and swap instruction. */
-static void guest_cas(void)
-{
-       uint64_t val;
-
-       GUEST_ASSERT(guest_check_lse());
-       asm volatile(".arch_extension lse\n"
-                    "casal %0, %1, [%2]\n"
-                    :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
-       val = READ_ONCE(*guest_test_memory);
-       GUEST_ASSERT_EQ(val, TEST_DATA);
-}
-
-static void guest_read64(void)
-{
-       uint64_t val;
-
-       val = READ_ONCE(*guest_test_memory);
-       GUEST_ASSERT_EQ(val, 0);
-}
-
-/* Address translation instruction */
-static void guest_at(void)
-{
-       uint64_t par;
-
-       asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
-       isb();
-       par = read_sysreg(par_el1);
-
-       /* Bit 1 indicates whether the AT was successful */
-       GUEST_ASSERT_EQ(par & 1, 0);
-}
-
-/*
- * The size of the block written by "dc zva" is guaranteed to be between (2 <<
- * 0) and (2 << 9), which is safe in our case as we need the write to happen
- * for at least a word, and not more than a page.
- */
-static void guest_dc_zva(void)
-{
-       uint16_t val;
-
-       asm volatile("dc zva, %0" :: "r" (guest_test_memory));
-       dsb(ish);
-       val = READ_ONCE(*guest_test_memory);
-       GUEST_ASSERT_EQ(val, 0);
-}
-
-/*
- * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
- * And that's special because KVM must take special care with those: they
- * should still count as accesses for dirty logging or user-faulting, but
- * should be handled differently on mmio.
- */
-static void guest_ld_preidx(void)
-{
-       uint64_t val;
-       uint64_t addr = TEST_GVA - 8;
-
-       /*
-        * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
-        * in a gap between memslots not backing by anything.
-        */
-       asm volatile("ldr %0, [%1, #8]!"
-                    : "=r" (val), "+r" (addr));
-       GUEST_ASSERT_EQ(val, 0);
-       GUEST_ASSERT_EQ(addr, TEST_GVA);
-}
-
-static void guest_st_preidx(void)
-{
-       uint64_t val = TEST_DATA;
-       uint64_t addr = TEST_GVA - 8;
-
-       asm volatile("str %0, [%1, #8]!"
-                    : "+r" (val), "+r" (addr));
-
-       GUEST_ASSERT_EQ(addr, TEST_GVA);
-       val = READ_ONCE(*guest_test_memory);
-}
-
-static bool guest_set_ha(void)
-{
-       uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
-       uint64_t hadbs, tcr;
-
-       /* Skip if HA is not supported. */
-       hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
-       if (hadbs == 0)
-               return false;
-
-       tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
-       write_sysreg(tcr, tcr_el1);
-       isb();
-
-       return true;
-}
-
-static bool guest_clear_pte_af(void)
-{
-       *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
-       flush_tlb_page(TEST_GVA);
-
-       return true;
-}
-
-static void guest_check_pte_af(void)
-{
-       dsb(ish);
-       GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
-}
-
-static void guest_check_write_in_dirty_log(void)
-{
-       GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
-}
-
-static void guest_check_no_write_in_dirty_log(void)
-{
-       GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
-}
-
-static void guest_check_s1ptw_wr_in_dirty_log(void)
-{
-       GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
-}
-
-static void guest_check_no_s1ptw_wr_in_dirty_log(void)
-{
-       GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
-}
-
-static void guest_exec(void)
-{
-       int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
-       int ret;
-
-       ret = code();
-       GUEST_ASSERT_EQ(ret, 0x77);
-}
-
-static bool guest_prepare(struct test_desc *test)
-{
-       bool (*prepare_fn)(void);
-       int i;
-
-       for (i = 0; i < PREPARE_FN_NR; i++) {
-               prepare_fn = test->guest_prepare[i];
-               if (prepare_fn && !prepare_fn())
-                       return false;
-       }
-
-       return true;
-}
-
-static void guest_test_check(struct test_desc *test)
-{
-       void (*check_fn)(void);
-       int i;
-
-       for (i = 0; i < CHECK_FN_NR; i++) {
-               check_fn = test->guest_test_check[i];
-               if (check_fn)
-                       check_fn();
-       }
-}
-
-static void guest_code(struct test_desc *test)
-{
-       if (!guest_prepare(test))
-               GUEST_SYNC(CMD_SKIP_TEST);
-
-       GUEST_SYNC(test->mem_mark_cmd);
-
-       if (test->guest_test)
-               test->guest_test();
-
-       guest_test_check(test);
-       GUEST_DONE();
-}
-
-static void no_dabt_handler(struct ex_regs *regs)
-{
-       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
-}
-
-static void no_iabt_handler(struct ex_regs *regs)
-{
-       GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
-}
-
-static struct uffd_args {
-       char *copy;
-       void *hva;
-       uint64_t paging_size;
-} pt_args, data_args;
-
-/* Returns true to continue the test, and false if it should be skipped. */
-static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
-                               struct uffd_args *args)
-{
-       uint64_t addr = msg->arg.pagefault.address;
-       uint64_t flags = msg->arg.pagefault.flags;
-       struct uffdio_copy copy;
-       int ret;
-
-       TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
-                   "The only expected UFFD mode is MISSING");
-       TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
-
-       pr_debug("uffd fault: addr=%p write=%d\n",
-                (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
-
-       copy.src = (uint64_t)args->copy;
-       copy.dst = addr;
-       copy.len = args->paging_size;
-       copy.mode = 0;
-
-       ret = ioctl(uffd, UFFDIO_COPY, &copy);
-       if (ret == -1) {
-               pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
-                       addr, errno);
-               return ret;
-       }
-
-       pthread_mutex_lock(&events.uffd_faults_mutex);
-       events.uffd_faults += 1;
-       pthread_mutex_unlock(&events.uffd_faults_mutex);
-       return 0;
-}
-
-static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
-{
-       return uffd_generic_handler(mode, uffd, msg, &pt_args);
-}
-
-static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
-{
-       return uffd_generic_handler(mode, uffd, msg, &data_args);
-}
-
-static void setup_uffd_args(struct userspace_mem_region *region,
-                           struct uffd_args *args)
-{
-       args->hva = (void *)region->region.userspace_addr;
-       args->paging_size = region->region.memory_size;
-
-       args->copy = malloc(args->paging_size);
-       TEST_ASSERT(args->copy, "Failed to allocate data copy.");
-       memcpy(args->copy, args->hva, args->paging_size);
-}
-
-static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
-                      struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
-{
-       struct test_desc *test = p->test_desc;
-       int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
-
-       setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
-       setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
-
-       *pt_uffd = NULL;
-       if (test->uffd_pt_handler)
-               *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
-                                                   pt_args.hva,
-                                                   pt_args.paging_size,
-                                                   1, test->uffd_pt_handler);
-
-       *data_uffd = NULL;
-       if (test->uffd_data_handler)
-               *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
-                                                     data_args.hva,
-                                                     data_args.paging_size,
-                                                     1, test->uffd_data_handler);
-}
-
-static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
-                     struct uffd_desc *data_uffd)
-{
-       if (test->uffd_pt_handler)
-               uffd_stop_demand_paging(pt_uffd);
-       if (test->uffd_data_handler)
-               uffd_stop_demand_paging(data_uffd);
-
-       free(pt_args.copy);
-       free(data_args.copy);
-}
-
-static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
-{
-       TEST_FAIL("There was no UFFD fault expected.");
-       return -1;
-}
-
-/* Returns false if the test should be skipped. */
-static bool punch_hole_in_backing_store(struct kvm_vm *vm,
-                                       struct userspace_mem_region *region)
-{
-       void *hva = (void *)region->region.userspace_addr;
-       uint64_t paging_size = region->region.memory_size;
-       int ret, fd = region->fd;
-
-       if (fd != -1) {
-               ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                               0, paging_size);
-               TEST_ASSERT(ret == 0, "fallocate failed");
-       } else {
-               ret = madvise(hva, paging_size, MADV_DONTNEED);
-               TEST_ASSERT(ret == 0, "madvise failed");
-       }
-
-       return true;
-}
-
-static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
-{
-       struct userspace_mem_region *region;
-       void *hva;
-
-       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-       hva = (void *)region->region.userspace_addr;
-
-       TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
-
-       memcpy(hva, run->mmio.data, run->mmio.len);
-       events.mmio_exits += 1;
-}
-
-static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
-{
-       uint64_t data;
-
-       memcpy(&data, run->mmio.data, sizeof(data));
-       pr_debug("addr=%lld len=%d w=%d data=%lx\n",
-                run->mmio.phys_addr, run->mmio.len,
-                run->mmio.is_write, data);
-       TEST_FAIL("There was no MMIO exit expected.");
-}
-
-static bool check_write_in_dirty_log(struct kvm_vm *vm,
-                                    struct userspace_mem_region *region,
-                                    uint64_t host_pg_nr)
-{
-       unsigned long *bmap;
-       bool first_page_dirty;
-       uint64_t size = region->region.memory_size;
-
-       /* getpage_size() is not always equal to vm->page_size */
-       bmap = bitmap_zalloc(size / getpagesize());
-       kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
-       first_page_dirty = test_bit(host_pg_nr, bmap);
-       free(bmap);
-       return first_page_dirty;
-}
-
-/* Returns true to continue the test, and false if it should be skipped. */
-static bool handle_cmd(struct kvm_vm *vm, int cmd)
-{
-       struct userspace_mem_region *data_region, *pt_region;
-       bool continue_test = true;
-       uint64_t pte_gpa, pte_pg;
-
-       data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-       pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
-       pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
-       pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
-
-       if (cmd == CMD_SKIP_TEST)
-               continue_test = false;
-
-       if (cmd & CMD_HOLE_PT)
-               continue_test = punch_hole_in_backing_store(vm, pt_region);
-       if (cmd & CMD_HOLE_DATA)
-               continue_test = punch_hole_in_backing_store(vm, data_region);
-       if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
-               TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
-                           "Missing write in dirty log");
-       if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
-               TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
-                           "Missing s1ptw write in dirty log");
-       if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
-               TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
-                           "Unexpected write in dirty log");
-       if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
-               TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
-                           "Unexpected s1ptw write in dirty log");
-
-       return continue_test;
-}
-
-void fail_vcpu_run_no_handler(int ret)
-{
-       TEST_FAIL("Unexpected vcpu run failure");
-}
-
-void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
-{
-       TEST_ASSERT(errno == ENOSYS,
-                   "The mmio handler should have returned not implemented.");
-       events.fail_vcpu_runs += 1;
-}
-
-typedef uint32_t aarch64_insn_t;
-extern aarch64_insn_t __exec_test[2];
-
-noinline void __return_0x77(void)
-{
-       asm volatile("__exec_test: mov x0, #0x77\n"
-                    "ret\n");
-}
-
-/*
- * Note that this function runs on the host before the test VM starts: there's
- * no need to sync the D$ and I$ caches.
- */
-static void load_exec_code_for_test(struct kvm_vm *vm)
-{
-       uint64_t *code;
-       struct userspace_mem_region *region;
-       void *hva;
-
-       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-       hva = (void *)region->region.userspace_addr;
-
-       assert(TEST_EXEC_GVA > TEST_GVA);
-       code = hva + TEST_EXEC_GVA - TEST_GVA;
-       memcpy(code, __exec_test, sizeof(__exec_test));
-}
-
-static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-                                struct test_desc *test)
-{
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vcpu);
-
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_DABT_CUR, no_dabt_handler);
-       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
-                               ESR_ELx_EC_IABT_CUR, no_iabt_handler);
-}
-
-static void setup_gva_maps(struct kvm_vm *vm)
-{
-       struct userspace_mem_region *region;
-       uint64_t pte_gpa;
-
-       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-       /* Map TEST_GVA first. This will install a new PTE. */
-       virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
-       /* Then map TEST_PTE_GVA to the above PTE. */
-       pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
-       virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
-}
-
-enum pf_test_memslots {
-       CODE_AND_DATA_MEMSLOT,
-       PAGE_TABLE_MEMSLOT,
-       TEST_DATA_MEMSLOT,
-};
-
-/*
- * Create a memslot for code and data at pfn=0, and test-data and PT ones
- * at max_gfn.
- */
-static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
-{
-       uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
-       uint64_t guest_page_size = vm->page_size;
-       uint64_t max_gfn = vm_compute_max_gfn(vm);
-       /* Enough for 2M of code when using 4K guest pages. */
-       uint64_t code_npages = 512;
-       uint64_t pt_size, data_size, data_gpa;
-
-       /*
-        * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
-        * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs.  That's 13
-        * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
-        * twice that just in case.
-        */
-       pt_size = 26 * guest_page_size;
-
-       /* memslot sizes and gpa's must be aligned to the backing page size */
-       pt_size = align_up(pt_size, backing_src_pagesz);
-       data_size = align_up(guest_page_size, backing_src_pagesz);
-       data_gpa = (max_gfn * guest_page_size) - data_size;
-       data_gpa = align_down(data_gpa, backing_src_pagesz);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
-                                   CODE_AND_DATA_MEMSLOT, code_npages, 0);
-       vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
-       vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
-
-       vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
-                                   PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
-                                   p->test_desc->pt_memslot_flags);
-       vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
-
-       vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
-                                   data_size / guest_page_size,
-                                   p->test_desc->data_memslot_flags);
-       vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
-}
-
-static void setup_ucall(struct kvm_vm *vm)
-{
-       struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-
-       ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
-}
-
-static void setup_default_handlers(struct test_desc *test)
-{
-       if (!test->mmio_handler)
-               test->mmio_handler = mmio_no_handler;
-
-       if (!test->fail_vcpu_run_handler)
-               test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
-}
-
-static void check_event_counts(struct test_desc *test)
-{
-       TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
-       TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
-       TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
-}
-
-static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
-{
-       struct test_desc *test = p->test_desc;
-
-       pr_debug("Test: %s\n", test->name);
-       pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-       pr_debug("Testing memory backing src type: %s\n",
-                vm_mem_backing_src_alias(p->src_type)->name);
-}
-
-static void reset_event_counts(void)
-{
-       memset(&events, 0, sizeof(events));
-}
-
-/*
- * This function either succeeds, skips the test (after setting test->skip), or
- * fails with a TEST_FAIL that aborts all tests.
- */
-static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-                         struct test_desc *test)
-{
-       struct kvm_run *run;
-       struct ucall uc;
-       int ret;
-
-       run = vcpu->run;
-
-       for (;;) {
-               ret = _vcpu_run(vcpu);
-               if (ret) {
-                       test->fail_vcpu_run_handler(ret);
-                       goto done;
-               }
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       if (!handle_cmd(vm, uc.args[1])) {
-                               test->skip = true;
-                               goto done;
-                       }
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               case UCALL_NONE:
-                       if (run->exit_reason == KVM_EXIT_MMIO)
-                               test->mmio_handler(vm, run);
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
-}
-
-static void run_test(enum vm_guest_mode mode, void *arg)
-{
-       struct test_params *p = (struct test_params *)arg;
-       struct test_desc *test = p->test_desc;
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       struct uffd_desc *pt_uffd, *data_uffd;
-
-       print_test_banner(mode, p);
-
-       vm = ____vm_create(VM_SHAPE(mode));
-       setup_memslots(vm, p);
-       kvm_vm_elf_load(vm, program_invocation_name);
-       setup_ucall(vm);
-       vcpu = vm_vcpu_add(vm, 0, guest_code);
-
-       setup_gva_maps(vm);
-
-       reset_event_counts();
-
-       /*
-        * Set some code in the data memslot for the guest to execute (only
-        * applicable to the EXEC tests). This has to be done before
-        * setup_uffd() as that function copies the memslot data for the uffd
-        * handler.
-        */
-       load_exec_code_for_test(vm);
-       setup_uffd(vm, p, &pt_uffd, &data_uffd);
-       setup_abort_handlers(vm, vcpu, test);
-       setup_default_handlers(test);
-       vcpu_args_set(vcpu, 1, test);
-
-       vcpu_run_loop(vm, vcpu, test);
-
-       kvm_vm_free(vm);
-       free_uffd(test, pt_uffd, data_uffd);
-
-       /*
-        * Make sure we check the events after the uffd threads have exited,
-        * which means they updated their respective event counters.
-        */
-       if (!test->skip)
-               check_event_counts(test);
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("usage: %s [-h] [-s mem-type]\n", name);
-       puts("");
-       guest_modes_help();
-       backing_src_help("-s");
-       puts("");
-}
-
-#define SNAME(s)                       #s
-#define SCAT2(a, b)                    SNAME(a ## _ ## b)
-#define SCAT3(a, b, c)                 SCAT2(a, SCAT2(b, c))
-#define SCAT4(a, b, c, d)              SCAT2(a, SCAT3(b, c, d))
-
-#define _CHECK(_test)                  _CHECK_##_test
-#define _PREPARE(_test)                        _PREPARE_##_test
-#define _PREPARE_guest_read64          NULL
-#define _PREPARE_guest_ld_preidx       NULL
-#define _PREPARE_guest_write64         NULL
-#define _PREPARE_guest_st_preidx       NULL
-#define _PREPARE_guest_exec            NULL
-#define _PREPARE_guest_at              NULL
-#define _PREPARE_guest_dc_zva          guest_check_dc_zva
-#define _PREPARE_guest_cas             guest_check_lse
-
-/* With or without access flag checks */
-#define _PREPARE_with_af               guest_set_ha, guest_clear_pte_af
-#define _PREPARE_no_af                 NULL
-#define _CHECK_with_af                 guest_check_pte_af
-#define _CHECK_no_af                   NULL
-
-/* Performs an access and checks that no faults were triggered. */
-#define TEST_ACCESS(_access, _with_af, _mark_cmd)                              \
-{                                                                              \
-       .name                   = SCAT3(_access, _with_af, #_mark_cmd),         \
-       .guest_prepare          = { _PREPARE(_with_af),                         \
-                                   _PREPARE(_access) },                        \
-       .mem_mark_cmd           = _mark_cmd,                                    \
-       .guest_test             = _access,                                      \
-       .guest_test_check       = { _CHECK(_with_af) },                         \
-       .expected_events        = { 0 },                                        \
-}
-
-#define TEST_UFFD(_access, _with_af, _mark_cmd,                                        \
-                 _uffd_data_handler, _uffd_pt_handler, _uffd_faults)           \
-{                                                                              \
-       .name                   = SCAT4(uffd, _access, _with_af, #_mark_cmd),   \
-       .guest_prepare          = { _PREPARE(_with_af),                         \
-                                   _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .mem_mark_cmd           = _mark_cmd,                                    \
-       .guest_test_check       = { _CHECK(_with_af) },                         \
-       .uffd_data_handler      = _uffd_data_handler,                           \
-       .uffd_pt_handler        = _uffd_pt_handler,                             \
-       .expected_events        = { .uffd_faults = _uffd_faults, },             \
-}
-
-#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check)              \
-{                                                                              \
-       .name                   = SCAT3(dirty_log, _access, _with_af),          \
-       .data_memslot_flags     = KVM_MEM_LOG_DIRTY_PAGES,                      \
-       .pt_memslot_flags       = KVM_MEM_LOG_DIRTY_PAGES,                      \
-       .guest_prepare          = { _PREPARE(_with_af),                         \
-                                   _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .guest_test_check       = { _CHECK(_with_af), _test_check, _pt_check }, \
-       .expected_events        = { 0 },                                        \
-}
-
-#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler,         \
-                               _uffd_faults, _test_check, _pt_check)           \
-{                                                                              \
-       .name                   = SCAT3(uffd_and_dirty_log, _access, _with_af), \
-       .data_memslot_flags     = KVM_MEM_LOG_DIRTY_PAGES,                      \
-       .pt_memslot_flags       = KVM_MEM_LOG_DIRTY_PAGES,                      \
-       .guest_prepare          = { _PREPARE(_with_af),                         \
-                                   _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
-       .guest_test_check       = { _CHECK(_with_af), _test_check, _pt_check }, \
-       .uffd_data_handler      = _uffd_data_handler,                           \
-       .uffd_pt_handler        = uffd_pt_handler,                              \
-       .expected_events        = { .uffd_faults = _uffd_faults, },             \
-}
-
-#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits)                   \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot, _access),                   \
-       .data_memslot_flags     = KVM_MEM_READONLY,                             \
-       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .mmio_handler           = _mmio_handler,                                \
-       .expected_events        = { .mmio_exits = _mmio_exits },                \
-}
-
-#define TEST_RO_MEMSLOT_NO_SYNDROME(_access)                                   \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot_no_syndrome, _access),       \
-       .data_memslot_flags     = KVM_MEM_READONLY,                             \
-       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
-       .expected_events        = { .fail_vcpu_runs = 1 },                      \
-}
-
-#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits,     \
-                                     _test_check)                              \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot, _access),                   \
-       .data_memslot_flags     = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
-       .pt_memslot_flags       = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .guest_test_check       = { _test_check },                              \
-       .mmio_handler           = _mmio_handler,                                \
-       .expected_events        = { .mmio_exits = _mmio_exits},                 \
-}
-
-#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check)                \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot_no_syn_and_dlog, _access),   \
-       .data_memslot_flags     = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
-       .pt_memslot_flags       = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .guest_test_check       = { _test_check },                              \
-       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
-       .expected_events        = { .fail_vcpu_runs = 1 },                      \
-}
-
-#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits,          \
-                                _uffd_data_handler, _uffd_faults)              \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot_uffd, _access),              \
-       .data_memslot_flags     = KVM_MEM_READONLY,                             \
-       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
-       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .uffd_data_handler      = _uffd_data_handler,                           \
-       .uffd_pt_handler        = uffd_pt_handler,                              \
-       .mmio_handler           = _mmio_handler,                                \
-       .expected_events        = { .mmio_exits = _mmio_exits,                  \
-                                   .uffd_faults = _uffd_faults },              \
-}
-
-#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler,      \
-                                            _uffd_faults)                      \
-{                                                                              \
-       .name                   = SCAT2(ro_memslot_no_syndrome, _access),       \
-       .data_memslot_flags     = KVM_MEM_READONLY,                             \
-       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
-       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
-       .guest_prepare          = { _PREPARE(_access) },                        \
-       .guest_test             = _access,                                      \
-       .uffd_data_handler      = _uffd_data_handler,                           \
-       .uffd_pt_handler        = uffd_pt_handler,                      \
-       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
-       .expected_events        = { .fail_vcpu_runs = 1,                        \
-                                   .uffd_faults = _uffd_faults },              \
-}
-
-static struct test_desc tests[] = {
-
-       /* Check that HW is setting the Access Flag (AF) (sanity checks). */
-       TEST_ACCESS(guest_read64, with_af, CMD_NONE),
-       TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
-       TEST_ACCESS(guest_cas, with_af, CMD_NONE),
-       TEST_ACCESS(guest_write64, with_af, CMD_NONE),
-       TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
-       TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
-       TEST_ACCESS(guest_exec, with_af, CMD_NONE),
-
-       /*
-        * Punch a hole in the data backing store, and then try multiple
-        * accesses: reads should rturn zeroes, and writes should
-        * re-populate the page. Moreover, the test also check that no
-        * exception was generated in the guest.  Note that this
-        * reading/writing behavior is the same as reading/writing a
-        * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
-        * userspace.
-        */
-       TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
-       TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
-
-       /*
-        * Punch holes in the data and PT backing stores and mark them for
-        * userfaultfd handling. This should result in 2 faults: the access
-        * on the data backing store, and its respective S1 page table walk
-        * (S1PTW).
-        */
-       TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       /*
-        * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
-        * The S1PTW fault should still be marked as a write.
-        */
-       TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_no_handler, uffd_pt_handler, 1),
-       TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-       TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
-                 uffd_data_handler, uffd_pt_handler, 2),
-
-       /*
-        * Try accesses when the data and PT memory regions are both
-        * tracked for dirty logging.
-        */
-       TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
-                      guest_check_no_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_ld_preidx, with_af,
-                      guest_check_no_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
-                      guest_check_no_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-       TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
-                      guest_check_s1ptw_wr_in_dirty_log),
-
-       /*
-        * Access when the data and PT memory regions are both marked for
-        * dirty logging and UFFD at the same time. The expected result is
-        * that writes should mark the dirty log and trigger a userfaultfd
-        * write fault.  Reads/execs should result in a read userfaultfd
-        * fault, and nothing in the dirty log.  Any S1PTW should result in
-        * a write in the dirty log and a userfaultfd write.
-        */
-       TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
-                               uffd_data_handler, 2,
-                               guest_check_no_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
-                               uffd_data_handler, 2,
-                               guest_check_no_write_in_dirty_log,
-                               guest_check_no_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
-                               uffd_data_handler,
-                               2, guest_check_no_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
-                               guest_check_no_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
-                               uffd_data_handler, 2,
-                               guest_check_no_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
-                               uffd_data_handler,
-                               2, guest_check_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
-                               uffd_data_handler, 2,
-                               guest_check_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
-                               uffd_data_handler,
-                               2, guest_check_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
-                               uffd_data_handler, 2,
-                               guest_check_write_in_dirty_log,
-                               guest_check_s1ptw_wr_in_dirty_log),
-       /*
-        * Access when both the PT and data regions are marked read-only
-        * (with KVM_MEM_READONLY). Writes with a syndrome result in an
-        * MMIO exit, writes with no syndrome (e.g., CAS) result in a
-        * failed vcpu run, and reads/execs with and without syndroms do
-        * not fault.
-        */
-       TEST_RO_MEMSLOT(guest_read64, 0, 0),
-       TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
-       TEST_RO_MEMSLOT(guest_at, 0, 0),
-       TEST_RO_MEMSLOT(guest_exec, 0, 0),
-       TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
-       TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
-       TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
-       TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
-
-       /*
-        * The PT and data regions are both read-only and marked
-        * for dirty logging at the same time. The expected result is that
-        * for writes there should be no write in the dirty log. The
-        * readonly handling is the same as if the memslot was not marked
-        * for dirty logging: writes with a syndrome result in an MMIO
-        * exit, and writes with no syndrome result in a failed vcpu run.
-        */
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
-                                     guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
-                                     guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
-                                     guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
-                                     guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
-                                     1, guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
-                                                 guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
-                                                 guest_check_no_write_in_dirty_log),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
-                                                 guest_check_no_write_in_dirty_log),
-
-       /*
-        * The PT and data regions are both read-only and punched with
-        * holes tracked with userfaultfd.  The expected result is the
-        * union of both userfaultfd and read-only behaviors. For example,
-        * write accesses result in a userfaultfd write fault and an MMIO
-        * exit.  Writes with no syndrome result in a failed vcpu run and
-        * no userfaultfd write fault. Reads result in userfaultfd getting
-        * triggered.
-        */
-       TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
-       TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
-                                uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
-       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
-
-       { 0 }
-};
-
-static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
-{
-       struct test_desc *t;
-
-       for (t = &tests[0]; t->name; t++) {
-               if (t->skip)
-                       continue;
-
-               struct test_params p = {
-                       .src_type = src_type,
-                       .test_desc = t,
-               };
-
-               for_each_guest_mode(run_test, &p);
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       enum vm_mem_backing_src_type src_type;
-       int opt;
-
-       src_type = DEFAULT_VM_MEM_SRC;
-
-       while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
-               switch (opt) {
-               case 'm':
-                       guest_modes_cmdline(optarg);
-                       break;
-               case 's':
-                       src_type = parse_backing_src_type(optarg);
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       exit(0);
-               }
-       }
-
-       for_each_test_and_guest_mode(src_type);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
deleted file mode 100644 (file)
index ab491ee..0000000
+++ /dev/null
@@ -1,290 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * psci_test - Tests relating to KVM's PSCI implementation.
- *
- * Copyright (c) 2021 Google LLC.
- *
- * This test includes:
- *  - A regression test for a race between KVM servicing the PSCI CPU_ON call
- *    and userspace reading the targeted vCPU's registers.
- *  - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
- *    KVM_SYSTEM_EVENT_SUSPEND UAPI.
- */
-
-#include <linux/kernel.h>
-#include <linux/psci.h>
-#include <asm/cputype.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
-#define CPU_ON_CONTEXT_ID 0xdeadc0deul
-
-static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
-                           uint64_t context_id)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
-                 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static uint64_t psci_affinity_info(uint64_t target_affinity,
-                                  uint64_t lowest_affinity_level)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
-                 0, 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
-                 0, 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static uint64_t psci_features(uint32_t func_id)
-{
-       struct arm_smccc_res res;
-
-       smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
-
-       return res.a0;
-}
-
-static void vcpu_power_off(struct kvm_vcpu *vcpu)
-{
-       struct kvm_mp_state mp_state = {
-               .mp_state = KVM_MP_STATE_STOPPED,
-       };
-
-       vcpu_mp_state_set(vcpu, &mp_state);
-}
-
-static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
-                              struct kvm_vcpu **target)
-{
-       struct kvm_vcpu_init init;
-       struct kvm_vm *vm;
-
-       vm = vm_create(2);
-
-       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
-       init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
-
-       *source = aarch64_vcpu_add(vm, 0, &init, guest_code);
-       *target = aarch64_vcpu_add(vm, 1, &init, guest_code);
-
-       return vm;
-}
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-       if (get_ucall(vcpu, &uc) == UCALL_ABORT)
-               REPORT_GUEST_ASSERT(uc);
-}
-
-static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
-{
-       uint64_t obs_pc, obs_x0;
-
-       obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
-       obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
-
-       TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
-                   "unexpected target cpu pc: %lx (expected: %lx)",
-                   obs_pc, CPU_ON_ENTRY_ADDR);
-       TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
-                   "unexpected target context id: %lx (expected: %lx)",
-                   obs_x0, CPU_ON_CONTEXT_ID);
-}
-
-static void guest_test_cpu_on(uint64_t target_cpu)
-{
-       uint64_t target_state;
-
-       GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
-
-       do {
-               target_state = psci_affinity_info(target_cpu, 0);
-
-               GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
-                            (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
-       } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
-
-       GUEST_DONE();
-}
-
-static void host_test_cpu_on(void)
-{
-       struct kvm_vcpu *source, *target;
-       uint64_t target_mpidr;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       vm = setup_vm(guest_test_cpu_on, &source, &target);
-
-       /*
-        * make sure the target is already off when executing the test.
-        */
-       vcpu_power_off(target);
-
-       target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
-       vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
-       enter_guest(source);
-
-       if (get_ucall(source, &uc) != UCALL_DONE)
-               TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
-
-       assert_vcpu_reset(target);
-       kvm_vm_free(vm);
-}
-
-static void guest_test_system_suspend(void)
-{
-       uint64_t ret;
-
-       /* assert that SYSTEM_SUSPEND is discoverable */
-       GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
-       GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND));
-
-       ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID);
-       GUEST_SYNC(ret);
-}
-
-static void host_test_system_suspend(void)
-{
-       struct kvm_vcpu *source, *target;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-
-       vm = setup_vm(guest_test_system_suspend, &source, &target);
-       vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0);
-
-       vcpu_power_off(target);
-       run = source->run;
-
-       enter_guest(source);
-
-       TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT);
-       TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
-                   "Unhandled system event: %u (expected: %u)",
-                   run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
-
-       kvm_vm_free(vm);
-}
-
-static void guest_test_system_off2(void)
-{
-       uint64_t ret;
-
-       /* assert that SYSTEM_OFF2 is discoverable */
-       GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) &
-                    PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
-       GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) &
-                    PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
-
-       /* With non-zero 'cookie' field, it should fail */
-       ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1);
-       GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
-
-       /*
-        * This would normally never return, so KVM sets the return value
-        * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so
-        * that it can test both values for HIBERNATE_OFF.
-        */
-       ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0);
-       GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
-
-       /*
-        * Revision F.b of the PSCI v1.3 specification documents zero as an
-        * alias for HIBERNATE_OFF, since that's the value used in earlier
-        * revisions of the spec and some implementations in the field.
-        */
-       ret = psci_system_off2(0, 1);
-       GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
-
-       ret = psci_system_off2(0, 0);
-       GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
-
-       GUEST_DONE();
-}
-
-static void host_test_system_off2(void)
-{
-       struct kvm_vcpu *source, *target;
-       struct kvm_mp_state mps;
-       uint64_t psci_version = 0;
-       int nr_shutdowns = 0;
-       struct kvm_run *run;
-       struct ucall uc;
-
-       setup_vm(guest_test_system_off2, &source, &target);
-
-       psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION);
-
-       TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3),
-                   "Unexpected PSCI version %lu.%lu",
-                   PSCI_VERSION_MAJOR(psci_version),
-                   PSCI_VERSION_MINOR(psci_version));
-
-       vcpu_power_off(target);
-       run = source->run;
-
-       enter_guest(source);
-       while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) {
-               TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN,
-                           "Unhandled system event: %u (expected: %u)",
-                           run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN);
-               TEST_ASSERT(run->system_event.ndata >= 1,
-                           "Unexpected amount of system event data: %u (expected, >= 1)",
-                           run->system_event.ndata);
-               TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2,
-                           "PSCI_OFF2 flag not set. Flags %llu (expected %llu)",
-                           run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2);
-
-               nr_shutdowns++;
-
-               /* Restart the vCPU */
-               mps.mp_state = KVM_MP_STATE_RUNNABLE;
-               vcpu_mp_state_set(source, &mps);
-
-               enter_guest(source);
-       }
-
-       TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly");
-       TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns);
-}
-
-int main(void)
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND));
-
-       host_test_cpu_on();
-       host_test_system_suspend();
-       host_test_system_off2();
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
deleted file mode 100644 (file)
index bc6cf50..0000000
+++ /dev/null
@@ -1,695 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * set_id_regs - Test for setting ID register from usersapce.
- *
- * Copyright (c) 2023 Google LLC.
- *
- *
- * Test that KVM supports setting ID registers from userspace and handles the
- * feature set correctly.
- */
-
-#include <stdint.h>
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-#include <linux/bitfield.h>
-
-enum ftr_type {
-       FTR_EXACT,                      /* Use a predefined safe value */
-       FTR_LOWER_SAFE,                 /* Smaller value is safe */
-       FTR_HIGHER_SAFE,                /* Bigger value is safe */
-       FTR_HIGHER_OR_ZERO_SAFE,        /* Bigger value is safe, but 0 is biggest */
-       FTR_END,                        /* Mark the last ftr bits */
-};
-
-#define FTR_SIGNED     true    /* Value should be treated as signed */
-#define FTR_UNSIGNED   false   /* Value should be treated as unsigned */
-
-struct reg_ftr_bits {
-       char *name;
-       bool sign;
-       enum ftr_type type;
-       uint8_t shift;
-       uint64_t mask;
-       /*
-        * For FTR_EXACT, safe_val is used as the exact safe value.
-        * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
-        */
-       int64_t safe_val;
-};
-
-struct test_feature_reg {
-       uint32_t reg;
-       const struct reg_ftr_bits *ftr_bits;
-};
-
-#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL)      \
-       {                                                               \
-               .name = #NAME,                                          \
-               .sign = SIGNED,                                         \
-               .type = TYPE,                                           \
-               .shift = SHIFT,                                         \
-               .mask = MASK,                                           \
-               .safe_val = SAFE_VAL,                                   \
-       }
-
-#define REG_FTR_BITS(type, reg, field, safe_val) \
-       __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
-                      reg##_##field##_MASK, safe_val)
-
-#define S_REG_FTR_BITS(type, reg, field, safe_val) \
-       __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
-                      reg##_##field##_MASK, safe_val)
-
-#define REG_FTR_END                                    \
-       {                                               \
-               .type = FTR_END,                        \
-       }
-
-static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0),
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
-       REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
-       REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
-       REG_FTR_END,
-};
-
-static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
-       REG_FTR_END,
-};
-
-#define TEST_REG(id, table)                    \
-       {                                       \
-               .reg = id,                      \
-               .ftr_bits = &((table)[0]),      \
-       }
-
-static struct test_feature_reg test_regs[] = {
-       TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
-       TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
-       TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
-       TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
-       TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
-       TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
-       TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
-       TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
-       TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
-       TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
-       TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
-};
-
-#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
-
-static void guest_code(void)
-{
-       GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
-       GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
-       GUEST_REG_SYNC(SYS_CTR_EL0);
-
-       GUEST_DONE();
-}
-
-/* Return a safe value to a given ftr_bits an ftr value */
-uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
-{
-       uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
-
-       if (ftr_bits->sign == FTR_UNSIGNED) {
-               switch (ftr_bits->type) {
-               case FTR_EXACT:
-                       ftr = ftr_bits->safe_val;
-                       break;
-               case FTR_LOWER_SAFE:
-                       if (ftr > ftr_bits->safe_val)
-                               ftr--;
-                       break;
-               case FTR_HIGHER_SAFE:
-                       if (ftr < ftr_max)
-                               ftr++;
-                       break;
-               case FTR_HIGHER_OR_ZERO_SAFE:
-                       if (ftr == ftr_max)
-                               ftr = 0;
-                       else if (ftr != 0)
-                               ftr++;
-                       break;
-               default:
-                       break;
-               }
-       } else if (ftr != ftr_max) {
-               switch (ftr_bits->type) {
-               case FTR_EXACT:
-                       ftr = ftr_bits->safe_val;
-                       break;
-               case FTR_LOWER_SAFE:
-                       if (ftr > ftr_bits->safe_val)
-                               ftr--;
-                       break;
-               case FTR_HIGHER_SAFE:
-                       if (ftr < ftr_max - 1)
-                               ftr++;
-                       break;
-               case FTR_HIGHER_OR_ZERO_SAFE:
-                       if (ftr != 0 && ftr != ftr_max - 1)
-                               ftr++;
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       return ftr;
-}
-
-/* Return an invalid value to a given ftr_bits an ftr value */
-uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
-{
-       uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
-
-       if (ftr_bits->sign == FTR_UNSIGNED) {
-               switch (ftr_bits->type) {
-               case FTR_EXACT:
-                       ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
-                       break;
-               case FTR_LOWER_SAFE:
-                       ftr++;
-                       break;
-               case FTR_HIGHER_SAFE:
-                       ftr--;
-                       break;
-               case FTR_HIGHER_OR_ZERO_SAFE:
-                       if (ftr == 0)
-                               ftr = ftr_max;
-                       else
-                               ftr--;
-                       break;
-               default:
-                       break;
-               }
-       } else if (ftr != ftr_max) {
-               switch (ftr_bits->type) {
-               case FTR_EXACT:
-                       ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
-                       break;
-               case FTR_LOWER_SAFE:
-                       ftr++;
-                       break;
-               case FTR_HIGHER_SAFE:
-                       ftr--;
-                       break;
-               case FTR_HIGHER_OR_ZERO_SAFE:
-                       if (ftr == 0)
-                               ftr = ftr_max - 1;
-                       else
-                               ftr--;
-                       break;
-               default:
-                       break;
-               }
-       } else {
-               ftr = 0;
-       }
-
-       return ftr;
-}
-
-static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
-                                    const struct reg_ftr_bits *ftr_bits)
-{
-       uint8_t shift = ftr_bits->shift;
-       uint64_t mask = ftr_bits->mask;
-       uint64_t val, new_val, ftr;
-
-       val = vcpu_get_reg(vcpu, reg);
-       ftr = (val & mask) >> shift;
-
-       ftr = get_safe_value(ftr_bits, ftr);
-
-       ftr <<= shift;
-       val &= ~mask;
-       val |= ftr;
-
-       vcpu_set_reg(vcpu, reg, val);
-       new_val = vcpu_get_reg(vcpu, reg);
-       TEST_ASSERT_EQ(new_val, val);
-
-       return new_val;
-}
-
-static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
-                             const struct reg_ftr_bits *ftr_bits)
-{
-       uint8_t shift = ftr_bits->shift;
-       uint64_t mask = ftr_bits->mask;
-       uint64_t val, old_val, ftr;
-       int r;
-
-       val = vcpu_get_reg(vcpu, reg);
-       ftr = (val & mask) >> shift;
-
-       ftr = get_invalid_value(ftr_bits, ftr);
-
-       old_val = val;
-       ftr <<= shift;
-       val &= ~mask;
-       val |= ftr;
-
-       r = __vcpu_set_reg(vcpu, reg, val);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
-
-       val = vcpu_get_reg(vcpu, reg);
-       TEST_ASSERT_EQ(val, old_val);
-}
-
-static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
-
-#define encoding_to_range_idx(encoding)                                                        \
-       KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding),      \
-                                    sys_reg_CRn(encoding), sys_reg_CRm(encoding),      \
-                                    sys_reg_Op2(encoding))
-
-
-static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
-{
-       uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
-       struct reg_mask_range range = {
-               .addr = (__u64)masks,
-       };
-       int ret;
-
-       /* KVM should return error when reserved field is not zero */
-       range.reserved[0] = 1;
-       ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
-       TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
-
-       /* Get writable masks for feature ID registers */
-       memset(range.reserved, 0, sizeof(range.reserved));
-       vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
-
-       for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
-               const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
-               uint32_t reg_id = test_regs[i].reg;
-               uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
-               int idx;
-
-               /* Get the index to masks array for the idreg */
-               idx = encoding_to_range_idx(reg_id);
-
-               for (int j = 0;  ftr_bits[j].type != FTR_END; j++) {
-                       /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
-                       if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
-                               ksft_test_result_skip("%s on AARCH64 only system\n",
-                                                     ftr_bits[j].name);
-                               continue;
-                       }
-
-                       /* Make sure the feature field is writable */
-                       TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
-
-                       test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
-
-                       test_reg_vals[idx] = test_reg_set_success(vcpu, reg,
-                                                                 &ftr_bits[j]);
-
-                       ksft_test_result_pass("%s\n", ftr_bits[j].name);
-               }
-       }
-}
-
-#define MPAM_IDREG_TEST        6
-static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
-{
-       uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
-       struct reg_mask_range range = {
-               .addr = (__u64)masks,
-       };
-       uint64_t val;
-       int idx, err;
-
-       /*
-        * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero,
-        * check that if it can be set to 1, (i.e. it is supported by the
-        * hardware), that it can't be set to other values.
-        */
-
-       /* Get writable masks for feature ID registers */
-       memset(range.reserved, 0, sizeof(range.reserved));
-       vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
-
-       /* Writeable? Nothing to test! */
-       idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1);
-       if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) {
-               ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n");
-               return;
-       }
-
-       /* Get the id register value */
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-
-       /* Try to set MPAM=0. This should always be possible. */
-       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
-       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
-       if (err)
-               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n");
-       else
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n");
-
-       /* Try to set MPAM=1 */
-       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
-       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
-       if (err)
-               ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n");
-       else
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n");
-
-       /* Try to set MPAM=2 */
-       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
-       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
-       if (err)
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n");
-       else
-               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n");
-
-       /* And again for ID_AA64PFR1_EL1.MPAM_frac */
-       idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
-       if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) {
-               ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n");
-               return;
-       }
-
-       /* Get the id register value */
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
-
-       /* Try to set MPAM_frac=0. This should always be possible. */
-       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
-       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
-       if (err)
-               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n");
-       else
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n");
-
-       /* Try to set MPAM_frac=1 */
-       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
-       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
-       if (err)
-               ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n");
-       else
-               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n");
-
-       /* Try to set MPAM_frac=2 */
-       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
-       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2);
-       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
-       if (err)
-               ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n");
-       else
-               ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
-}
-
-static void test_guest_reg_read(struct kvm_vcpu *vcpu)
-{
-       bool done = false;
-       struct ucall uc;
-
-       while (!done) {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_SYNC:
-                       /* Make sure the written values are seen by guest */
-                       TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
-                                      uc.args[3]);
-                       break;
-               case UCALL_DONE:
-                       done = true;
-                       break;
-               default:
-                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-               }
-       }
-}
-
-/* Politely lifted from arch/arm64/include/asm/cache.h */
-/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
-#define CLIDR_CTYPE_SHIFT(level)       (3 * (level - 1))
-#define CLIDR_CTYPE_MASK(level)                (7 << CLIDR_CTYPE_SHIFT(level))
-#define CLIDR_CTYPE(clidr, level)      \
-       (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
-
-static void test_clidr(struct kvm_vcpu *vcpu)
-{
-       uint64_t clidr;
-       int level;
-
-       clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
-
-       /* find the first empty level in the cache hierarchy */
-       for (level = 1; level < 7; level++) {
-               if (!CLIDR_CTYPE(clidr, level))
-                       break;
-       }
-
-       /*
-        * If you have a mind-boggling 7 levels of cache, congratulations, you
-        * get to fix this.
-        */
-       TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy");
-
-       /* stick in a unified cache level */
-       clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level);
-
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr);
-       test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr;
-}
-
-static void test_ctr(struct kvm_vcpu *vcpu)
-{
-       u64 ctr;
-
-       ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0));
-       ctr &= ~CTR_EL0_DIC_MASK;
-       if (ctr & CTR_EL0_IminLine_MASK)
-               ctr--;
-
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr);
-       test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
-}
-
-static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
-{
-       u64 val;
-
-       test_clidr(vcpu);
-       test_ctr(vcpu);
-
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
-       val++;
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
-
-       test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
-       ksft_test_result_pass("%s\n", __func__);
-}
-
-static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
-{
-       size_t idx = encoding_to_range_idx(encoding);
-       uint64_t observed;
-
-       observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
-       TEST_ASSERT_EQ(test_reg_vals[idx], observed);
-}
-
-static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
-{
-       /*
-        * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an
-        * architectural reset of the vCPU.
-        */
-       aarch64_vcpu_setup(vcpu, NULL);
-
-       for (int i = 0; i < ARRAY_SIZE(test_regs); i++)
-               test_assert_id_reg_unchanged(vcpu, test_regs[i].reg);
-
-       test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
-       test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
-       test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
-
-       ksft_test_result_pass("%s\n", __func__);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       bool aarch64_only;
-       uint64_t val, el0;
-       int test_cnt;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       /* Check for AARCH64 only system */
-       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
-       el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
-       aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
-
-       ksft_print_header();
-
-       test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
-                  ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
-                  ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
-                  ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
-                  ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
-                  ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
-                  MPAM_IDREG_TEST;
-
-       ksft_set_plan(test_cnt);
-
-       test_vm_ftr_id_regs(vcpu, aarch64_only);
-       test_vcpu_ftr_id_regs(vcpu);
-       test_user_set_mpam_reg(vcpu);
-
-       test_guest_reg_read(vcpu);
-
-       test_reset_preserves_id_regs(vcpu);
-
-       kvm_vm_free(vm);
-
-       ksft_finished();
-}
diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
deleted file mode 100644 (file)
index 2d189f3..0000000
+++ /dev/null
@@ -1,268 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * smccc_filter - Tests for the SMCCC filter UAPI.
- *
- * Copyright (c) 2023 Google LLC
- *
- * This test includes:
- *  - Tests that the UAPI constraints are upheld by KVM. For example, userspace
- *    is prevented from filtering the architecture range of SMCCC calls.
- *  - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/psci.h>
-#include <stdint.h>
-
-#include "processor.h"
-#include "test_util.h"
-
-enum smccc_conduit {
-       HVC_INSN,
-       SMC_INSN,
-};
-
-#define for_each_conduit(conduit)                                      \
-       for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
-
-static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
-{
-       struct arm_smccc_res res;
-
-       if (conduit == SMC_INSN)
-               smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
-       else
-               smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
-
-       GUEST_SYNC(res.a0);
-}
-
-static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
-                             enum kvm_smccc_filter_action action)
-{
-       struct kvm_smccc_filter filter = {
-               .base           = start,
-               .nr_functions   = nr_functions,
-               .action         = action,
-       };
-
-       return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
-                                    KVM_ARM_VM_SMCCC_FILTER, &filter);
-}
-
-static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
-                            enum kvm_smccc_filter_action action)
-{
-       int ret = __set_smccc_filter(vm, start, nr_functions, action);
-
-       TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
-}
-
-static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
-{
-       struct kvm_vcpu_init init;
-       struct kvm_vm *vm;
-
-       vm = vm_create(1);
-       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
-
-       /*
-        * Enable in-kernel emulation of PSCI to ensure that calls are denied
-        * due to the SMCCC filter, not because of KVM.
-        */
-       init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
-
-       *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
-       return vm;
-}
-
-static void test_pad_must_be_zero(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       struct kvm_smccc_filter filter = {
-               .base           = PSCI_0_2_FN_PSCI_VERSION,
-               .nr_functions   = 1,
-               .action         = KVM_SMCCC_FILTER_DENY,
-               .pad            = { -1 },
-       };
-       int r;
-
-       r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
-                                 KVM_ARM_VM_SMCCC_FILTER, &filter);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Setting filter with nonzero padding should return EINVAL");
-}
-
-/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
-static void test_filter_reserved_range(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       uint32_t smc64_fn;
-       int r;
-
-       r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
-                              1, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EEXIST,
-                   "Attempt to filter reserved range should return EEXIST");
-
-       smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
-                                     0, 0);
-
-       r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EEXIST,
-                   "Attempt to filter reserved range should return EEXIST");
-
-       kvm_vm_free(vm);
-}
-
-static void test_invalid_nr_functions(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       int r;
-
-       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Attempt to filter 0 functions should return EINVAL");
-
-       kvm_vm_free(vm);
-}
-
-static void test_overflow_nr_functions(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       int r;
-
-       r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Attempt to overflow filter range should return EINVAL");
-
-       kvm_vm_free(vm);
-}
-
-static void test_reserved_action(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       int r;
-
-       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
-       TEST_ASSERT(r < 0 && errno == EINVAL,
-                   "Attempt to use reserved filter action should return EINVAL");
-
-       kvm_vm_free(vm);
-}
-
-
-/* Test that overlapping configurations of the SMCCC filter are rejected */
-static void test_filter_overlap(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = setup_vm(&vcpu);
-       int r;
-
-       set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
-
-       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
-       TEST_ASSERT(r < 0 && errno == EEXIST,
-                   "Attempt to filter already configured range should return EEXIST");
-
-       kvm_vm_free(vm);
-}
-
-static void expect_call_denied(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       if (get_ucall(vcpu, &uc) != UCALL_SYNC)
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-
-       TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
-                   "Unexpected SMCCC return code: %lu", uc.args[1]);
-}
-
-/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
-static void test_filter_denied(void)
-{
-       enum smccc_conduit conduit;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       for_each_conduit(conduit) {
-               vm = setup_vm(&vcpu);
-
-               set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
-               vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
-
-               vcpu_run(vcpu);
-               expect_call_denied(vcpu);
-
-               kvm_vm_free(vm);
-       }
-}
-
-static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
-                                   enum smccc_conduit conduit)
-{
-       struct kvm_run *run = vcpu->run;
-
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
-                   "Unexpected exit reason: %u", run->exit_reason);
-       TEST_ASSERT(run->hypercall.nr == func_id,
-                   "Unexpected SMCCC function: %llu", run->hypercall.nr);
-
-       if (conduit == SMC_INSN)
-               TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
-                           "KVM_HYPERCALL_EXIT_SMC is not set");
-       else
-               TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
-                           "KVM_HYPERCALL_EXIT_SMC is set");
-}
-
-/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
-static void test_filter_fwd_to_user(void)
-{
-       enum smccc_conduit conduit;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       for_each_conduit(conduit) {
-               vm = setup_vm(&vcpu);
-
-               set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
-               vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
-
-               vcpu_run(vcpu);
-               expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
-
-               kvm_vm_free(vm);
-       }
-}
-
-static bool kvm_supports_smccc_filter(void)
-{
-       struct kvm_vm *vm = vm_create_barebones();
-       int r;
-
-       r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
-
-       kvm_vm_free(vm);
-       return !r;
-}
-
-int main(void)
-{
-       TEST_REQUIRE(kvm_supports_smccc_filter());
-
-       test_pad_must_be_zero();
-       test_invalid_nr_functions();
-       test_overflow_nr_functions();
-       test_reserved_action();
-       test_filter_reserved_range();
-       test_filter_overlap();
-       test_filter_denied();
-       test_filter_fwd_to_user();
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
deleted file mode 100644 (file)
index 80b74c6..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
- *
- * Copyright (c) 2022 Google LLC.
- *
- * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
- * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
- * configured.
- */
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-
-/*
- * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then
- * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1.
- */
-static int add_init_2vcpus(struct kvm_vcpu_init *init0,
-                          struct kvm_vcpu_init *init1)
-{
-       struct kvm_vcpu *vcpu0, *vcpu1;
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones();
-
-       vcpu0 = __vm_vcpu_add(vm, 0);
-       ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
-       if (ret)
-               goto free_exit;
-
-       vcpu1 = __vm_vcpu_add(vm, 1);
-       ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
-
-free_exit:
-       kvm_vm_free(vm);
-       return ret;
-}
-
-/*
- * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0,
- * and run KVM_ARM_VCPU_INIT for another vCPU with @init1.
- */
-static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0,
-                                 struct kvm_vcpu_init *init1)
-{
-       struct kvm_vcpu *vcpu0, *vcpu1;
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones();
-
-       vcpu0 = __vm_vcpu_add(vm, 0);
-       vcpu1 = __vm_vcpu_add(vm, 1);
-
-       ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
-       if (ret)
-               goto free_exit;
-
-       ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
-
-free_exit:
-       kvm_vm_free(vm);
-       return ret;
-}
-
-/*
- * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
- * configured, and two mixed-width vCPUs cannot be configured.
- * Each of those three cases, configure vCPUs in two different orders.
- * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
- * KVM_ARM_VCPU_INIT for them.
- * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
- * and then run those commands for another vCPU.
- */
-int main(void)
-{
-       struct kvm_vcpu_init init0, init1;
-       struct kvm_vm *vm;
-       int ret;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT));
-
-       /* Get the preferred target type and copy that to init1 for later use */
-       vm = vm_create_barebones();
-       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0);
-       kvm_vm_free(vm);
-       init1 = init0;
-
-       /* Test with 64bit vCPUs */
-       ret = add_init_2vcpus(&init0, &init0);
-       TEST_ASSERT(ret == 0,
-                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
-       ret = add_2vcpus_init_2vcpus(&init0, &init0);
-       TEST_ASSERT(ret == 0,
-                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
-
-       /* Test with 32bit vCPUs */
-       init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
-       ret = add_init_2vcpus(&init0, &init0);
-       TEST_ASSERT(ret == 0,
-                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
-       ret = add_2vcpus_init_2vcpus(&init0, &init0);
-       TEST_ASSERT(ret == 0,
-                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
-
-       /* Test with mixed-width vCPUs  */
-       init0.features[0] = 0;
-       init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
-       ret = add_init_2vcpus(&init0, &init1);
-       TEST_ASSERT(ret != 0,
-                   "Configuring mixed-width vCPUs worked unexpectedly");
-       ret = add_2vcpus_init_2vcpus(&init0, &init1);
-       TEST_ASSERT(ret != 0,
-                   "Configuring mixed-width vCPUs worked unexpectedly");
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c
deleted file mode 100644 (file)
index b3b5fb0..0000000
+++ /dev/null
@@ -1,764 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * vgic init sequence tests
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <linux/kernel.h>
-#include <sys/syscall.h>
-#include <asm/kvm.h>
-#include <asm/kvm_para.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vgic.h"
-
-#define NR_VCPUS               4
-
-#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
-
-#define GICR_TYPER 0x8
-
-#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
-#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
-
-struct vm_gic {
-       struct kvm_vm *vm;
-       int gic_fd;
-       uint32_t gic_dev_type;
-};
-
-static uint64_t max_phys_size;
-
-/*
- * Helpers to access a redistributor register and verify the ioctl() failed or
- * succeeded as expected, and provided the correct value on success.
- */
-static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
-                                   int want, const char *msg)
-{
-       uint32_t ignored_val;
-       int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
-                                       REG_OFFSET(vcpu, offset), &ignored_val);
-
-       TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
-}
-
-static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
-                             const char *msg)
-{
-       uint32_t val;
-
-       kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
-                           REG_OFFSET(vcpu, offset), &val);
-       TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val);
-}
-
-/* dummy guest code */
-static void guest_code(void)
-{
-       GUEST_SYNC(0);
-       GUEST_SYNC(1);
-       GUEST_SYNC(2);
-       GUEST_DONE();
-}
-
-/* we don't want to assert on run execution, hence that helper */
-static int run_vcpu(struct kvm_vcpu *vcpu)
-{
-       return __vcpu_run(vcpu) ? -errno : 0;
-}
-
-static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
-                                             uint32_t nr_vcpus,
-                                             struct kvm_vcpu *vcpus[])
-{
-       struct vm_gic v;
-
-       v.gic_dev_type = gic_dev_type;
-       v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
-       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
-
-       return v;
-}
-
-static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type)
-{
-       struct vm_gic v;
-
-       v.gic_dev_type = gic_dev_type;
-       v.vm = vm_create_barebones();
-       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
-
-       return v;
-}
-
-
-static void vm_gic_destroy(struct vm_gic *v)
-{
-       close(v->gic_fd);
-       kvm_vm_free(v->vm);
-}
-
-struct vgic_region_attr {
-       uint64_t attr;
-       uint64_t size;
-       uint64_t alignment;
-};
-
-struct vgic_region_attr gic_v3_dist_region = {
-       .attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
-       .size = 0x10000,
-       .alignment = 0x10000,
-};
-
-struct vgic_region_attr gic_v3_redist_region = {
-       .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
-       .size = NR_VCPUS * 0x20000,
-       .alignment = 0x10000,
-};
-
-struct vgic_region_attr gic_v2_dist_region = {
-       .attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
-       .size = 0x1000,
-       .alignment = 0x1000,
-};
-
-struct vgic_region_attr gic_v2_cpu_region = {
-       .attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
-       .size = 0x2000,
-       .alignment = 0x1000,
-};
-
-/**
- * Helper routine that performs KVM device tests in general. Eventually the
- * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
- * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
- * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
- * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
- * DIST region @0x1000.
- */
-static void subtest_dist_rdist(struct vm_gic *v)
-{
-       int ret;
-       uint64_t addr;
-       struct vgic_region_attr rdist; /* CPU interface in GICv2*/
-       struct vgic_region_attr dist;
-
-       rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
-                                               : gic_v2_cpu_region;
-       dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
-                                               : gic_v2_dist_region;
-
-       /* Check existing group/attributes */
-       kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr);
-
-       kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr);
-
-       /* check non existing attribute */
-       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
-       TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
-
-       /* misaligned DIST and REDIST address settings */
-       addr = dist.alignment / 0x10;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   dist.attr, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
-
-       addr = rdist.alignment / 0x10;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   rdist.attr, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
-
-       /* out of range address */
-       addr = max_phys_size;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   dist.attr, &addr);
-       TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
-
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   rdist.attr, &addr);
-       TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
-
-       /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
-       addr = max_phys_size - dist.alignment;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   rdist.attr, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-                       "half of the redist is beyond IPA limit");
-
-       /* set REDIST base address @0x0*/
-       addr = 0x00000;
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           rdist.attr, &addr);
-
-       /* Attempt to create a second legacy redistributor region */
-       addr = 0xE0000;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   rdist.attr, &addr);
-       TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
-
-       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                    KVM_VGIC_V3_ADDR_TYPE_REDIST);
-       if (!ret) {
-               /* Attempt to mix legacy and new redistributor regions */
-               addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
-               ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-               TEST_ASSERT(ret && errno == EINVAL,
-                           "attempt to mix GICv3 REDIST and REDIST_REGION");
-       }
-
-       /*
-        * Set overlapping DIST / REDIST, cannot be detected here. Will be detected
-        * on first vcpu run instead.
-        */
-       addr = rdist.size - rdist.alignment;
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           dist.attr, &addr);
-}
-
-/* Test the new REDIST region API */
-static void subtest_v3_redist_regions(struct vm_gic *v)
-{
-       uint64_t addr, expected_addr;
-       int ret;
-
-       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST);
-       TEST_ASSERT(!ret, "Multiple redist regions advertised");
-
-       addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0");
-
-       addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0");
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "attempt to register the first rdist region with index != 0");
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address");
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index");
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "register an rdist region overlapping with another one");
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1");
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-                   "register redist region with base address beyond IPA range");
-
-       /* The last redist is above the pa range. */
-       addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-                   "register redist region with top address beyond IPA range");
-
-       addr = 0x260000;
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION");
-
-       /*
-        * Now there are 2 redist regions:
-        * region 0 @ 0x200000 2 redists
-        * region 1 @ 0x240000 1 redist
-        * Attempt to read their characteristics
-        */
-
-       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0);
-       expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
-       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0");
-
-       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1);
-       expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
-       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1");
-
-       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2);
-       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region");
-
-       addr = 0x260000;
-       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2);
-       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist");
-}
-
-/*
- * VGIC KVM device is created and initialized before the secondary CPUs
- * get created
- */
-static void test_vgic_then_vcpus(uint32_t gic_dev_type)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       int ret, i;
-
-       v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus);
-
-       subtest_dist_rdist(&v);
-
-       /* Add the rest of the VCPUs */
-       for (i = 1; i < NR_VCPUS; ++i)
-               vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
-
-       vm_gic_destroy(&v);
-}
-
-/* All the VCPUs are created before the VGIC KVM device gets initialized */
-static void test_vcpus_then_vgic(uint32_t gic_dev_type)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       int ret;
-
-       v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus);
-
-       subtest_dist_rdist(&v);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
-
-       vm_gic_destroy(&v);
-}
-
-#define KVM_VGIC_V2_ATTR(offset, cpu) \
-       (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \
-        FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu))
-
-#define GIC_CPU_CTRL   0x00
-
-static void test_v2_uaccess_cpuif_no_vcpus(void)
-{
-       struct vm_gic v;
-       u64 val = 0;
-       int ret;
-
-       v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2);
-       subtest_dist_rdist(&v);
-
-       ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
-                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0));
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "accessed non-existent CPU interface, want errno: %i",
-                   EINVAL);
-       ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
-                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "accessed non-existent CPU interface, want errno: %i",
-                   EINVAL);
-       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
-                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
-       TEST_ASSERT(ret && errno == EINVAL,
-                   "accessed non-existent CPU interface, want errno: %i",
-                   EINVAL);
-
-       vm_gic_destroy(&v);
-}
-
-static void test_v3_new_redist_regions(void)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       void *dummy = NULL;
-       struct vm_gic v;
-       uint64_t addr;
-       int ret;
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
-       subtest_v3_redist_regions(&v);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists");
-       vm_gic_destroy(&v);
-
-       /* step2 */
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
-       subtest_v3_redist_regions(&v);
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init");
-
-       vm_gic_destroy(&v);
-
-       /* step 3 */
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
-       subtest_v3_redist_regions(&v);
-
-       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy);
-       TEST_ASSERT(ret && errno == EFAULT,
-                   "register a third region allowing to cover the 4 vcpus");
-
-       addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       ret = run_vcpu(vcpus[3]);
-       TEST_ASSERT(!ret, "vcpu run");
-
-       vm_gic_destroy(&v);
-}
-
-static void test_v3_typer_accesses(void)
-{
-       struct vm_gic v;
-       uint64_t addr;
-       int ret, i;
-
-       v.vm = vm_create(NR_VCPUS);
-       (void)vm_vcpu_add(v.vm, 0, guest_code);
-
-       v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
-
-       (void)vm_vcpu_add(v.vm, 3, guest_code);
-
-       v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL,
-                               "attempting to read GICR_TYPER of non created vcpu");
-
-       (void)vm_vcpu_add(v.vm, 1, guest_code);
-
-       v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY,
-                               "read GICR_TYPER before GIC initialized");
-
-       (void)vm_vcpu_add(v.vm, 2, guest_code);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       for (i = 0; i < NR_VCPUS ; i++) {
-               v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100,
-                                 "read GICR_TYPER before rdist region setting");
-       }
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       /* The 2 first rdists should be put there (vcpu 0 and 3) */
-       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0");
-       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1");
-
-       addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
-       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-       TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
-
-       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100,
-                         "no redist region attached to vcpu #1 yet, last cannot be returned");
-       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200,
-                         "no redist region attached to vcpu #2, last cannot be returned");
-
-       addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
-       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210,
-                         "read typer of rdist #1, last properly returned");
-
-       vm_gic_destroy(&v);
-}
-
-static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
-                                                  uint32_t vcpuids[])
-{
-       struct vm_gic v;
-       int i;
-
-       v.vm = vm_create(nr_vcpus);
-       for (i = 0; i < nr_vcpus; i++)
-               vm_vcpu_add(v.vm, vcpuids[i], guest_code);
-
-       v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
-
-       return v;
-}
-
-/**
- * Test GICR_TYPER last bit with new redist regions
- * rdist regions #1 and #2 are contiguous
- * rdist region #0 @0x100000 2 rdist capacity
- *     rdists: 0, 3 (Last)
- * rdist region #1 @0x240000 2 rdist capacity
- *     rdists:  5, 4 (Last)
- * rdist region #2 @0x200000 2 rdist capacity
- *     rdists: 1, 2
- */
-static void test_v3_last_bit_redist_regions(void)
-{
-       uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
-       struct vm_gic v;
-       uint64_t addr;
-
-       v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
-
-       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
-       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
-       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2");
-       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3");
-       v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5");
-       v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4");
-
-       vm_gic_destroy(&v);
-}
-
-/* Test last bit with legacy region */
-static void test_v3_last_bit_single_rdist(void)
-{
-       uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
-       struct vm_gic v;
-       uint64_t addr;
-
-       v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       addr = 0x10000;
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
-
-       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
-       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1");
-       v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2");
-       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3");
-       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3");
-
-       vm_gic_destroy(&v);
-}
-
-/* Uses the legacy REDIST region API. */
-static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       int ret, i;
-       uint64_t addr;
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
-
-       /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
-       addr = max_phys_size - (3 * 2 * 0x10000);
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
-
-       addr = 0x00000;
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
-
-       /* Add the rest of the VCPUs */
-       for (i = 1; i < NR_VCPUS; ++i)
-               vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
-
-       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       /* Attempt to run a vcpu without enough redist space. */
-       ret = run_vcpu(vcpus[2]);
-       TEST_ASSERT(ret && errno == EINVAL,
-               "redist base+size above PA range detected on 1st vcpu run");
-
-       vm_gic_destroy(&v);
-}
-
-static void test_v3_its_region(void)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       uint64_t addr;
-       int its_fd, ret;
-
-       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
-       its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
-
-       addr = 0x401000;
-       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
-       TEST_ASSERT(ret && errno == EINVAL,
-               "ITS region with misaligned address");
-
-       addr = max_phys_size;
-       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-               "register ITS region with base address beyond IPA range");
-
-       addr = max_phys_size - 0x10000;
-       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
-       TEST_ASSERT(ret && errno == E2BIG,
-               "Half of ITS region is beyond IPA range");
-
-       /* This one succeeds setting the ITS base */
-       addr = 0x400000;
-       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_ITS_ADDR_TYPE, &addr);
-
-       addr = 0x300000;
-       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
-       TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
-
-       close(its_fd);
-       vm_gic_destroy(&v);
-}
-
-/*
- * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
- */
-int test_kvm_device(uint32_t gic_dev_type)
-{
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       struct vm_gic v;
-       uint32_t other;
-       int ret;
-
-       v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
-
-       /* try to create a non existing KVM device */
-       ret = __kvm_test_create_device(v.vm, 0);
-       TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
-
-       /* trial mode */
-       ret = __kvm_test_create_device(v.vm, gic_dev_type);
-       if (ret)
-               return ret;
-       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
-
-       ret = __kvm_create_device(v.vm, gic_dev_type);
-       TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
-
-       /* try to create the other gic_dev_type */
-       other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
-                                            : KVM_DEV_TYPE_ARM_VGIC_V2;
-
-       if (!__kvm_test_create_device(v.vm, other)) {
-               ret = __kvm_create_device(v.vm, other);
-               TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST),
-                               "create GIC device while other version exists");
-       }
-
-       vm_gic_destroy(&v);
-
-       return 0;
-}
-
-void run_tests(uint32_t gic_dev_type)
-{
-       test_vcpus_then_vgic(gic_dev_type);
-       test_vgic_then_vcpus(gic_dev_type);
-
-       if (VGIC_DEV_IS_V2(gic_dev_type))
-               test_v2_uaccess_cpuif_no_vcpus();
-
-       if (VGIC_DEV_IS_V3(gic_dev_type)) {
-               test_v3_new_redist_regions();
-               test_v3_typer_accesses();
-               test_v3_last_bit_redist_regions();
-               test_v3_last_bit_single_rdist();
-               test_v3_redist_ipa_range_check_at_vcpu_run();
-               test_v3_its_region();
-       }
-}
-
-int main(int ac, char **av)
-{
-       int ret;
-       int pa_bits;
-       int cnt_impl = 0;
-
-       pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
-       max_phys_size = 1ULL << pa_bits;
-
-       ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
-       if (!ret) {
-               pr_info("Running GIC_v3 tests.\n");
-               run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
-               cnt_impl++;
-       }
-
-       ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
-       if (!ret) {
-               pr_info("Running GIC_v2 tests.\n");
-               run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
-               cnt_impl++;
-       }
-
-       if (!cnt_impl) {
-               print_skip("No GICv2 nor GICv3 support");
-               exit(KSFT_SKIP);
-       }
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
deleted file mode 100644 (file)
index f4ac28d..0000000
+++ /dev/null
@@ -1,847 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * vgic_irq.c - Test userspace injection of IRQs
- *
- * This test validates the injection of IRQs from userspace using various
- * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
- * host to inject a specific intid via a GUEST_SYNC call, and then checks that
- * it received it.
- */
-#include <asm/kvm.h>
-#include <asm/kvm_para.h>
-#include <sys/eventfd.h>
-#include <linux/sizes.h>
-
-#include "processor.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "gic.h"
-#include "gic_v3.h"
-#include "vgic.h"
-
-/*
- * Stores the user specified args; it's passed to the guest and to every test
- * function.
- */
-struct test_args {
-       uint32_t nr_irqs; /* number of KVM supported IRQs. */
-       bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
-       bool level_sensitive; /* 1 is level, 0 is edge */
-       int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
-       bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
-};
-
-/*
- * KVM implements 32 priority levels:
- * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
- *
- * Note that these macros will still be correct in the case that KVM implements
- * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
- */
-#define KVM_NUM_PRIOS          32
-#define KVM_PRIO_SHIFT         3 /* steps of 8 = 1 << 3 */
-#define KVM_PRIO_STEPS         (1 << KVM_PRIO_SHIFT) /* 8 */
-#define LOWEST_PRIO            (KVM_NUM_PRIOS - 1)
-#define CPU_PRIO_MASK          (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
-#define IRQ_DEFAULT_PRIO       (LOWEST_PRIO - 1)
-#define IRQ_DEFAULT_PRIO_REG   (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
-
-/*
- * The kvm_inject_* utilities are used by the guest to ask the host to inject
- * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
- */
-
-typedef enum {
-       KVM_INJECT_EDGE_IRQ_LINE = 1,
-       KVM_SET_IRQ_LINE,
-       KVM_SET_IRQ_LINE_HIGH,
-       KVM_SET_LEVEL_INFO_HIGH,
-       KVM_INJECT_IRQFD,
-       KVM_WRITE_ISPENDR,
-       KVM_WRITE_ISACTIVER,
-} kvm_inject_cmd;
-
-struct kvm_inject_args {
-       kvm_inject_cmd cmd;
-       uint32_t first_intid;
-       uint32_t num;
-       int level;
-       bool expect_failure;
-};
-
-/* Used on the guest side to perform the hypercall. */
-static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
-               uint32_t num, int level, bool expect_failure);
-
-/* Used on the host side to get the hypercall info. */
-static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
-               struct kvm_inject_args *args);
-
-#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure)                     \
-       kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
-
-#define KVM_INJECT_MULTI(cmd, intid, num)                                      \
-       _KVM_INJECT_MULTI(cmd, intid, num, false)
-
-#define _KVM_INJECT(cmd, intid, expect_failure)                                        \
-       _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
-
-#define KVM_INJECT(cmd, intid)                                                 \
-       _KVM_INJECT_MULTI(cmd, intid, 1, false)
-
-#define KVM_ACTIVATE(cmd, intid)                                               \
-       kvm_inject_call(cmd, intid, 1, 1, false);
-
-struct kvm_inject_desc {
-       kvm_inject_cmd cmd;
-       /* can inject PPIs, PPIs, and/or SPIs. */
-       bool sgi, ppi, spi;
-};
-
-static struct kvm_inject_desc inject_edge_fns[] = {
-       /*                                      sgi    ppi    spi */
-       { KVM_INJECT_EDGE_IRQ_LINE,             false, false, true },
-       { KVM_INJECT_IRQFD,                     false, false, true },
-       { KVM_WRITE_ISPENDR,                    true,  false, true },
-       { 0, },
-};
-
-static struct kvm_inject_desc inject_level_fns[] = {
-       /*                                      sgi    ppi    spi */
-       { KVM_SET_IRQ_LINE_HIGH,                false, true,  true },
-       { KVM_SET_LEVEL_INFO_HIGH,              false, true,  true },
-       { KVM_INJECT_IRQFD,                     false, false, true },
-       { KVM_WRITE_ISPENDR,                    false, true,  true },
-       { 0, },
-};
-
-static struct kvm_inject_desc set_active_fns[] = {
-       /*                                      sgi    ppi    spi */
-       { KVM_WRITE_ISACTIVER,                  true,  true,  true },
-       { 0, },
-};
-
-#define for_each_inject_fn(t, f)                                               \
-       for ((f) = (t); (f)->cmd; (f)++)
-
-#define for_each_supported_inject_fn(args, t, f)                               \
-       for_each_inject_fn(t, f)                                                \
-               if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
-
-#define for_each_supported_activate_fn(args, t, f)                             \
-       for_each_supported_inject_fn((args), (t), (f))
-
-/* Shared between the guest main thread and the IRQ handlers. */
-volatile uint64_t irq_handled;
-volatile uint32_t irqnr_received[MAX_SPI + 1];
-
-static void reset_stats(void)
-{
-       int i;
-
-       irq_handled = 0;
-       for (i = 0; i <= MAX_SPI; i++)
-               irqnr_received[i] = 0;
-}
-
-static uint64_t gic_read_ap1r0(void)
-{
-       uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
-
-       dsb(sy);
-       return reg;
-}
-
-static void gic_write_ap1r0(uint64_t val)
-{
-       write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
-       isb();
-}
-
-static void guest_set_irq_line(uint32_t intid, uint32_t level);
-
-static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
-{
-       uint32_t intid = gic_get_and_ack_irq();
-
-       if (intid == IAR_SPURIOUS)
-               return;
-
-       GUEST_ASSERT(gic_irq_get_active(intid));
-
-       if (!level_sensitive)
-               GUEST_ASSERT(!gic_irq_get_pending(intid));
-
-       if (level_sensitive)
-               guest_set_irq_line(intid, 0);
-
-       GUEST_ASSERT(intid < MAX_SPI);
-       irqnr_received[intid] += 1;
-       irq_handled += 1;
-
-       gic_set_eoi(intid);
-       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
-       if (eoi_split)
-               gic_set_dir(intid);
-
-       GUEST_ASSERT(!gic_irq_get_active(intid));
-       GUEST_ASSERT(!gic_irq_get_pending(intid));
-}
-
-static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
-               uint32_t num, int level, bool expect_failure)
-{
-       struct kvm_inject_args args = {
-               .cmd = cmd,
-               .first_intid = first_intid,
-               .num = num,
-               .level = level,
-               .expect_failure = expect_failure,
-       };
-       GUEST_SYNC(&args);
-}
-
-#define GUEST_ASSERT_IAR_EMPTY()                                               \
-do {                                                                           \
-       uint32_t _intid;                                                        \
-       _intid = gic_get_and_ack_irq();                                         \
-       GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS);                    \
-} while (0)
-
-#define CAT_HELPER(a, b) a ## b
-#define CAT(a, b) CAT_HELPER(a, b)
-#define PREFIX guest_irq_handler_
-#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
-#define GENERATE_GUEST_IRQ_HANDLER(split, lev)                                 \
-static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs)                 \
-{                                                                              \
-       guest_irq_generic_handler(split, lev);                                  \
-}
-
-GENERATE_GUEST_IRQ_HANDLER(0, 0);
-GENERATE_GUEST_IRQ_HANDLER(0, 1);
-GENERATE_GUEST_IRQ_HANDLER(1, 0);
-GENERATE_GUEST_IRQ_HANDLER(1, 1);
-
-static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
-       {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
-       {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
-};
-
-static void reset_priorities(struct test_args *args)
-{
-       int i;
-
-       for (i = 0; i < args->nr_irqs; i++)
-               gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
-}
-
-static void guest_set_irq_line(uint32_t intid, uint32_t level)
-{
-       kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
-}
-
-static void test_inject_fail(struct test_args *args,
-               uint32_t intid, kvm_inject_cmd cmd)
-{
-       reset_stats();
-
-       _KVM_INJECT(cmd, intid, true);
-       /* no IRQ to handle on entry */
-
-       GUEST_ASSERT_EQ(irq_handled, 0);
-       GUEST_ASSERT_IAR_EMPTY();
-}
-
-static void guest_inject(struct test_args *args,
-               uint32_t first_intid, uint32_t num,
-               kvm_inject_cmd cmd)
-{
-       uint32_t i;
-
-       reset_stats();
-
-       /* Cycle over all priorities to make things more interesting. */
-       for (i = first_intid; i < num + first_intid; i++)
-               gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
-
-       asm volatile("msr daifset, #2" : : : "memory");
-       KVM_INJECT_MULTI(cmd, first_intid, num);
-
-       while (irq_handled < num) {
-               wfi();
-               local_irq_enable();
-               isb(); /* handle IRQ */
-               local_irq_disable();
-       }
-       local_irq_enable();
-
-       GUEST_ASSERT_EQ(irq_handled, num);
-       for (i = first_intid; i < num + first_intid; i++)
-               GUEST_ASSERT_EQ(irqnr_received[i], 1);
-       GUEST_ASSERT_IAR_EMPTY();
-
-       reset_priorities(args);
-}
-
-/*
- * Restore the active state of multiple concurrent IRQs (given by
- * concurrent_irqs).  This does what a live-migration would do on the
- * destination side assuming there are some active IRQs that were not
- * deactivated yet.
- */
-static void guest_restore_active(struct test_args *args,
-               uint32_t first_intid, uint32_t num,
-               kvm_inject_cmd cmd)
-{
-       uint32_t prio, intid, ap1r;
-       int i;
-
-       /*
-        * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
-        * in descending order, so intid+1 can preempt intid.
-        */
-       for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
-               GUEST_ASSERT(prio >= 0);
-               intid = i + first_intid;
-               gic_set_priority(intid, prio);
-       }
-
-       /*
-        * In a real migration, KVM would restore all GIC state before running
-        * guest code.
-        */
-       for (i = 0; i < num; i++) {
-               intid = i + first_intid;
-               KVM_ACTIVATE(cmd, intid);
-               ap1r = gic_read_ap1r0();
-               ap1r |= 1U << i;
-               gic_write_ap1r0(ap1r);
-       }
-
-       /* This is where the "migration" would occur. */
-
-       /* finish handling the IRQs starting with the highest priority one. */
-       for (i = 0; i < num; i++) {
-               intid = num - i - 1 + first_intid;
-               gic_set_eoi(intid);
-               if (args->eoi_split)
-                       gic_set_dir(intid);
-       }
-
-       for (i = 0; i < num; i++)
-               GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
-       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
-       GUEST_ASSERT_IAR_EMPTY();
-}
-
-/*
- * Polls the IAR until it's not a spurious interrupt.
- *
- * This function should only be used in test_inject_preemption (with IRQs
- * masked).
- */
-static uint32_t wait_for_and_activate_irq(void)
-{
-       uint32_t intid;
-
-       do {
-               asm volatile("wfi" : : : "memory");
-               intid = gic_get_and_ack_irq();
-       } while (intid == IAR_SPURIOUS);
-
-       return intid;
-}
-
-/*
- * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
- * handle them without handling the actual exceptions.  This is done by masking
- * interrupts for the whole test.
- */
-static void test_inject_preemption(struct test_args *args,
-               uint32_t first_intid, int num,
-               kvm_inject_cmd cmd)
-{
-       uint32_t intid, prio, step = KVM_PRIO_STEPS;
-       int i;
-
-       /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
-        * in descending order, so intid+1 can preempt intid.
-        */
-       for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
-               GUEST_ASSERT(prio >= 0);
-               intid = i + first_intid;
-               gic_set_priority(intid, prio);
-       }
-
-       local_irq_disable();
-
-       for (i = 0; i < num; i++) {
-               uint32_t tmp;
-               intid = i + first_intid;
-               KVM_INJECT(cmd, intid);
-               /* Each successive IRQ will preempt the previous one. */
-               tmp = wait_for_and_activate_irq();
-               GUEST_ASSERT_EQ(tmp, intid);
-               if (args->level_sensitive)
-                       guest_set_irq_line(intid, 0);
-       }
-
-       /* finish handling the IRQs starting with the highest priority one. */
-       for (i = 0; i < num; i++) {
-               intid = num - i - 1 + first_intid;
-               gic_set_eoi(intid);
-               if (args->eoi_split)
-                       gic_set_dir(intid);
-       }
-
-       local_irq_enable();
-
-       for (i = 0; i < num; i++)
-               GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
-       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
-       GUEST_ASSERT_IAR_EMPTY();
-
-       reset_priorities(args);
-}
-
-static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
-{
-       uint32_t nr_irqs = args->nr_irqs;
-
-       if (f->sgi) {
-               guest_inject(args, MIN_SGI, 1, f->cmd);
-               guest_inject(args, 0, 16, f->cmd);
-       }
-
-       if (f->ppi)
-               guest_inject(args, MIN_PPI, 1, f->cmd);
-
-       if (f->spi) {
-               guest_inject(args, MIN_SPI, 1, f->cmd);
-               guest_inject(args, nr_irqs - 1, 1, f->cmd);
-               guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
-       }
-}
-
-static void test_injection_failure(struct test_args *args,
-               struct kvm_inject_desc *f)
-{
-       uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
-               test_inject_fail(args, bad_intid[i], f->cmd);
-}
-
-static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
-{
-       /*
-        * Test up to 4 levels of preemption. The reason is that KVM doesn't
-        * currently implement the ability to have more than the number-of-LRs
-        * number of concurrently active IRQs. The number of LRs implemented is
-        * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
-        */
-       if (f->sgi)
-               test_inject_preemption(args, MIN_SGI, 4, f->cmd);
-
-       if (f->ppi)
-               test_inject_preemption(args, MIN_PPI, 4, f->cmd);
-
-       if (f->spi)
-               test_inject_preemption(args, MIN_SPI, 4, f->cmd);
-}
-
-static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
-{
-       /* Test up to 4 active IRQs. Same reason as in test_preemption. */
-       if (f->sgi)
-               guest_restore_active(args, MIN_SGI, 4, f->cmd);
-
-       if (f->ppi)
-               guest_restore_active(args, MIN_PPI, 4, f->cmd);
-
-       if (f->spi)
-               guest_restore_active(args, MIN_SPI, 4, f->cmd);
-}
-
-static void guest_code(struct test_args *args)
-{
-       uint32_t i, nr_irqs = args->nr_irqs;
-       bool level_sensitive = args->level_sensitive;
-       struct kvm_inject_desc *f, *inject_fns;
-
-       gic_init(GIC_V3, 1);
-
-       for (i = 0; i < nr_irqs; i++)
-               gic_irq_enable(i);
-
-       for (i = MIN_SPI; i < nr_irqs; i++)
-               gic_irq_set_config(i, !level_sensitive);
-
-       gic_set_eoi_split(args->eoi_split);
-
-       reset_priorities(args);
-       gic_set_priority_mask(CPU_PRIO_MASK);
-
-       inject_fns  = level_sensitive ? inject_level_fns
-                                     : inject_edge_fns;
-
-       local_irq_enable();
-
-       /* Start the tests. */
-       for_each_supported_inject_fn(args, inject_fns, f) {
-               test_injection(args, f);
-               test_preemption(args, f);
-               test_injection_failure(args, f);
-       }
-
-       /*
-        * Restore the active state of IRQs. This would happen when live
-        * migrating IRQs in the middle of being handled.
-        */
-       for_each_supported_activate_fn(args, set_active_fns, f)
-               test_restore_active(args, f);
-
-       GUEST_DONE();
-}
-
-static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
-                       struct test_args *test_args, bool expect_failure)
-{
-       int ret;
-
-       if (!expect_failure) {
-               kvm_arm_irq_line(vm, intid, level);
-       } else {
-               /* The interface doesn't allow larger intid's. */
-               if (intid > KVM_ARM_IRQ_NUM_MASK)
-                       return;
-
-               ret = _kvm_arm_irq_line(vm, intid, level);
-               TEST_ASSERT(ret != 0 && errno == EINVAL,
-                               "Bad intid %i did not cause KVM_IRQ_LINE "
-                               "error: rc: %i errno: %i", intid, ret, errno);
-       }
-}
-
-void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
-                       bool expect_failure)
-{
-       if (!expect_failure) {
-               kvm_irq_set_level_info(gic_fd, intid, level);
-       } else {
-               int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
-               /*
-                * The kernel silently fails for invalid SPIs and SGIs (which
-                * are not level-sensitive). It only checks for intid to not
-                * spill over 1U << 10 (the max reserved SPI). Also, callers
-                * are supposed to mask the intid with 0x3ff (1023).
-                */
-               if (intid > VGIC_MAX_RESERVED)
-                       TEST_ASSERT(ret != 0 && errno == EINVAL,
-                               "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
-                               "error: rc: %i errno: %i", intid, ret, errno);
-               else
-                       TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
-                               "for intid %i failed, rc: %i errno: %i",
-                               intid, ret, errno);
-       }
-}
-
-static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
-               uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
-               bool expect_failure)
-{
-       struct kvm_irq_routing *routing;
-       int ret;
-       uint64_t i;
-
-       assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
-
-       routing = kvm_gsi_routing_create();
-       for (i = intid; i < (uint64_t)intid + num; i++)
-               kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
-
-       if (!expect_failure) {
-               kvm_gsi_routing_write(vm, routing);
-       } else {
-               ret = _kvm_gsi_routing_write(vm, routing);
-               /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
-               if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
-                       TEST_ASSERT(ret != 0 && errno == EINVAL,
-                               "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
-                               "error: rc: %i errno: %i", intid, ret, errno);
-               else
-                       TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
-                               "for intid %i failed, rc: %i errno: %i",
-                               intid, ret, errno);
-       }
-}
-
-static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
-                                       struct kvm_vcpu *vcpu,
-                                       bool expect_failure)
-{
-       /*
-        * Ignore this when expecting failure as invalid intids will lead to
-        * either trying to inject SGIs when we configured the test to be
-        * level_sensitive (or the reverse), or inject large intids which
-        * will lead to writing above the ISPENDR register space (and we
-        * don't want to do that either).
-        */
-       if (!expect_failure)
-               kvm_irq_write_ispendr(gic_fd, intid, vcpu);
-}
-
-static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
-               uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
-               bool expect_failure)
-{
-       int fd[MAX_SPI];
-       uint64_t val;
-       int ret, f;
-       uint64_t i;
-
-       /*
-        * There is no way to try injecting an SGI or PPI as the interface
-        * starts counting from the first SPI (above the private ones), so just
-        * exit.
-        */
-       if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
-               return;
-
-       kvm_set_gsi_routing_irqchip_check(vm, intid, num,
-                       kvm_max_routes, expect_failure);
-
-       /*
-        * If expect_failure, then just to inject anyway. These
-        * will silently fail. And in any case, the guest will check
-        * that no actual interrupt was injected for those cases.
-        */
-
-       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
-               fd[f] = eventfd(0, 0);
-               TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
-       }
-
-       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
-               struct kvm_irqfd irqfd = {
-                       .fd  = fd[f],
-                       .gsi = i - MIN_SPI,
-               };
-               assert(i <= (uint64_t)UINT_MAX);
-               vm_ioctl(vm, KVM_IRQFD, &irqfd);
-       }
-
-       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
-               val = 1;
-               ret = write(fd[f], &val, sizeof(uint64_t));
-               TEST_ASSERT(ret == sizeof(uint64_t),
-                           __KVM_SYSCALL_ERROR("write()", ret));
-       }
-
-       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
-               close(fd[f]);
-}
-
-/* handles the valid case: intid=0xffffffff num=1 */
-#define for_each_intid(first, num, tmp, i)                                     \
-       for ((tmp) = (i) = (first);                                             \
-               (tmp) < (uint64_t)(first) + (uint64_t)(num);                    \
-               (tmp)++, (i)++)
-
-static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
-                         struct kvm_inject_args *inject_args,
-                         struct test_args *test_args)
-{
-       kvm_inject_cmd cmd = inject_args->cmd;
-       uint32_t intid = inject_args->first_intid;
-       uint32_t num = inject_args->num;
-       int level = inject_args->level;
-       bool expect_failure = inject_args->expect_failure;
-       struct kvm_vm *vm = vcpu->vm;
-       uint64_t tmp;
-       uint32_t i;
-
-       /* handles the valid case: intid=0xffffffff num=1 */
-       assert(intid < UINT_MAX - num || num == 1);
-
-       switch (cmd) {
-       case KVM_INJECT_EDGE_IRQ_LINE:
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_line_check(vm, i, 1, test_args,
-                                       expect_failure);
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_line_check(vm, i, 0, test_args,
-                                       expect_failure);
-               break;
-       case KVM_SET_IRQ_LINE:
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_line_check(vm, i, level, test_args,
-                                       expect_failure);
-               break;
-       case KVM_SET_IRQ_LINE_HIGH:
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_line_check(vm, i, 1, test_args,
-                                       expect_failure);
-               break;
-       case KVM_SET_LEVEL_INFO_HIGH:
-               for_each_intid(intid, num, tmp, i)
-                       kvm_irq_set_level_info_check(gic_fd, i, 1,
-                                       expect_failure);
-               break;
-       case KVM_INJECT_IRQFD:
-               kvm_routing_and_irqfd_check(vm, intid, num,
-                                       test_args->kvm_max_routes,
-                                       expect_failure);
-               break;
-       case KVM_WRITE_ISPENDR:
-               for (i = intid; i < intid + num; i++)
-                       kvm_irq_write_ispendr_check(gic_fd, i, vcpu,
-                                                   expect_failure);
-               break;
-       case KVM_WRITE_ISACTIVER:
-               for (i = intid; i < intid + num; i++)
-                       kvm_irq_write_isactiver(gic_fd, i, vcpu);
-               break;
-       default:
-               break;
-       }
-}
-
-static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
-               struct kvm_inject_args *args)
-{
-       struct kvm_inject_args *kvm_args_hva;
-       vm_vaddr_t kvm_args_gva;
-
-       kvm_args_gva = uc->args[1];
-       kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
-       memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
-}
-
-static void print_args(struct test_args *args)
-{
-       printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
-                       args->nr_irqs, args->level_sensitive,
-                       args->eoi_split);
-}
-
-static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
-{
-       struct ucall uc;
-       int gic_fd;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_inject_args inject_args;
-       vm_vaddr_t args_gva;
-
-       struct test_args args = {
-               .nr_irqs = nr_irqs,
-               .level_sensitive = level_sensitive,
-               .eoi_split = eoi_split,
-               .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
-               .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
-       };
-
-       print_args(&args);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vcpu);
-
-       /* Setup the guest args page (so it gets the args). */
-       args_gva = vm_vaddr_alloc_page(vm);
-       memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
-       vcpu_args_set(vcpu, 1, args_gva);
-
-       gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
-       __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
-
-       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
-               guest_irq_handlers[args.eoi_split][args.level_sensitive]);
-
-       while (1) {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       kvm_inject_get_call(vm, &uc, &inject_args);
-                       run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       close(gic_fd);
-       kvm_vm_free(vm);
-}
-
-static void help(const char *name)
-{
-       printf(
-       "\n"
-       "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
-       printf(" -n: specify number of IRQs to setup the vgic with. "
-               "It has to be a multiple of 32 and between 64 and 1024.\n");
-       printf(" -e: if 1 then EOI is split into a write to DIR on top "
-               "of writing EOI.\n");
-       printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
-       puts("");
-       exit(1);
-}
-
-int main(int argc, char **argv)
-{
-       uint32_t nr_irqs = 64;
-       bool default_args = true;
-       bool level_sensitive = false;
-       int opt;
-       bool eoi_split = false;
-
-       while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
-               switch (opt) {
-               case 'n':
-                       nr_irqs = atoi_non_negative("Number of IRQs", optarg);
-                       if (nr_irqs > 1024 || nr_irqs % 32)
-                               help(argv[0]);
-                       break;
-               case 'e':
-                       eoi_split = (bool)atoi_paranoid(optarg);
-                       default_args = false;
-                       break;
-               case 'l':
-                       level_sensitive = (bool)atoi_paranoid(optarg);
-                       default_args = false;
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       break;
-               }
-       }
-
-       /*
-        * If the user just specified nr_irqs and/or gic_version, then run all
-        * combinations.
-        */
-       if (default_args) {
-               test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
-               test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
-               test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
-               test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
-       } else {
-               test_vgic(nr_irqs, level_sensitive, eoi_split);
-       }
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c
deleted file mode 100644 (file)
index fc4fe52..0000000
+++ /dev/null
@@ -1,410 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * vgic_lpi_stress - Stress test for KVM's ITS emulation
- *
- * Copyright (c) 2024 Google LLC
- */
-
-#include <linux/sizes.h>
-#include <pthread.h>
-#include <stdatomic.h>
-#include <sys/sysinfo.h>
-
-#include "kvm_util.h"
-#include "gic.h"
-#include "gic_v3.h"
-#include "gic_v3_its.h"
-#include "processor.h"
-#include "ucall.h"
-#include "vgic.h"
-
-#define TEST_MEMSLOT_INDEX     1
-
-#define GIC_LPI_OFFSET 8192
-
-static size_t nr_iterations = 1000;
-static vm_paddr_t gpa_base;
-
-static struct kvm_vm *vm;
-static struct kvm_vcpu **vcpus;
-static int gic_fd, its_fd;
-
-static struct test_data {
-       bool            request_vcpus_stop;
-       u32             nr_cpus;
-       u32             nr_devices;
-       u32             nr_event_ids;
-
-       vm_paddr_t      device_table;
-       vm_paddr_t      collection_table;
-       vm_paddr_t      cmdq_base;
-       void            *cmdq_base_va;
-       vm_paddr_t      itt_tables;
-
-       vm_paddr_t      lpi_prop_table;
-       vm_paddr_t      lpi_pend_tables;
-} test_data =  {
-       .nr_cpus        = 1,
-       .nr_devices     = 1,
-       .nr_event_ids   = 16,
-};
-
-static void guest_irq_handler(struct ex_regs *regs)
-{
-       u32 intid = gic_get_and_ack_irq();
-
-       if (intid == IAR_SPURIOUS)
-               return;
-
-       GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
-       gic_set_eoi(intid);
-}
-
-static void guest_setup_its_mappings(void)
-{
-       u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
-       u32 nr_events = test_data.nr_event_ids;
-       u32 nr_devices = test_data.nr_devices;
-       u32 nr_cpus = test_data.nr_cpus;
-
-       for (coll_id = 0; coll_id < nr_cpus; coll_id++)
-               its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
-
-       /* Round-robin the LPIs to all of the vCPUs in the VM */
-       coll_id = 0;
-       for (device_id = 0; device_id < nr_devices; device_id++) {
-               vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
-
-               its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
-                                 itt_base, SZ_64K, true);
-
-               for (event_id = 0; event_id < nr_events; event_id++) {
-                       its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
-                                          event_id, coll_id, intid++);
-
-                       coll_id = (coll_id + 1) % test_data.nr_cpus;
-               }
-       }
-}
-
-static void guest_invalidate_all_rdists(void)
-{
-       int i;
-
-       for (i = 0; i < test_data.nr_cpus; i++)
-               its_send_invall_cmd(test_data.cmdq_base_va, i);
-}
-
-static void guest_setup_gic(void)
-{
-       static atomic_int nr_cpus_ready = 0;
-       u32 cpuid = guest_get_vcpuid();
-
-       gic_init(GIC_V3, test_data.nr_cpus);
-       gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
-                             test_data.lpi_pend_tables + (cpuid * SZ_64K));
-
-       atomic_fetch_add(&nr_cpus_ready, 1);
-
-       if (cpuid > 0)
-               return;
-
-       while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
-               cpu_relax();
-
-       its_init(test_data.collection_table, SZ_64K,
-                test_data.device_table, SZ_64K,
-                test_data.cmdq_base, SZ_64K);
-
-       guest_setup_its_mappings();
-       guest_invalidate_all_rdists();
-}
-
-static void guest_code(size_t nr_lpis)
-{
-       guest_setup_gic();
-
-       GUEST_SYNC(0);
-
-       /*
-        * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
-        * never getting the stop signal.
-        */
-       while (!READ_ONCE(test_data.request_vcpus_stop))
-               cpu_relax();
-
-       GUEST_DONE();
-}
-
-static void setup_memslot(void)
-{
-       size_t pages;
-       size_t sz;
-
-       /*
-        * For the ITS:
-        *  - A single level device table
-        *  - A single level collection table
-        *  - The command queue
-        *  - An ITT for each device
-        */
-       sz = (3 + test_data.nr_devices) * SZ_64K;
-
-       /*
-        * For the redistributors:
-        *  - A shared LPI configuration table
-        *  - An LPI pending table for each vCPU
-        */
-       sz += (1 + test_data.nr_cpus) * SZ_64K;
-
-       pages = sz / vm->page_size;
-       gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
-                                   TEST_MEMSLOT_INDEX, pages, 0);
-}
-
-#define LPI_PROP_DEFAULT_PRIO  0xa0
-
-static void configure_lpis(void)
-{
-       size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
-       u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
-       size_t i;
-
-       for (i = 0; i < nr_lpis; i++) {
-               tbl[i] = LPI_PROP_DEFAULT_PRIO |
-                        LPI_PROP_GROUP1 |
-                        LPI_PROP_ENABLED;
-       }
-}
-
-static void setup_test_data(void)
-{
-       size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
-       u32 nr_devices = test_data.nr_devices;
-       u32 nr_cpus = test_data.nr_cpus;
-       vm_paddr_t cmdq_base;
-
-       test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
-                                                   gpa_base,
-                                                   TEST_MEMSLOT_INDEX);
-
-       test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
-                                                       gpa_base,
-                                                       TEST_MEMSLOT_INDEX);
-
-       cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
-                                      TEST_MEMSLOT_INDEX);
-       virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
-       test_data.cmdq_base = cmdq_base;
-       test_data.cmdq_base_va = (void *)cmdq_base;
-
-       test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
-                                                 gpa_base, TEST_MEMSLOT_INDEX);
-
-       test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
-                                                     gpa_base, TEST_MEMSLOT_INDEX);
-       configure_lpis();
-
-       test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
-                                                      gpa_base, TEST_MEMSLOT_INDEX);
-
-       sync_global_to_guest(vm, test_data);
-}
-
-static void setup_gic(void)
-{
-       gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
-       __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
-
-       its_fd = vgic_its_setup(vm);
-}
-
-static void signal_lpi(u32 device_id, u32 event_id)
-{
-       vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
-
-       struct kvm_msi msi = {
-               .address_lo     = db_addr,
-               .address_hi     = db_addr >> 32,
-               .data           = event_id,
-               .devid          = device_id,
-               .flags          = KVM_MSI_VALID_DEVID,
-       };
-
-       /*
-        * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
-        * which for arm64 implies having a valid translation in the ITS.
-        */
-       TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
-                   "KVM_SIGNAL_MSI ioctl failed");
-}
-
-static pthread_barrier_t test_setup_barrier;
-
-static void *lpi_worker_thread(void *data)
-{
-       u32 device_id = (size_t)data;
-       u32 event_id;
-       size_t i;
-
-       pthread_barrier_wait(&test_setup_barrier);
-
-       for (i = 0; i < nr_iterations; i++)
-               for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
-                       signal_lpi(device_id, event_id);
-
-       return NULL;
-}
-
-static void *vcpu_worker_thread(void *data)
-{
-       struct kvm_vcpu *vcpu = data;
-       struct ucall uc;
-
-       while (true) {
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       pthread_barrier_wait(&test_setup_barrier);
-                       continue;
-               case UCALL_DONE:
-                       return NULL;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall: %lu", uc.cmd);
-               }
-       }
-
-       return NULL;
-}
-
-static void report_stats(struct timespec delta)
-{
-       double nr_lpis;
-       double time;
-
-       nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
-
-       time = delta.tv_sec;
-       time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
-
-       pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
-}
-
-static void run_test(void)
-{
-       u32 nr_devices = test_data.nr_devices;
-       u32 nr_vcpus = test_data.nr_cpus;
-       pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
-       pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
-       struct timespec start, delta;
-       size_t i;
-
-       TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
-
-       pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
-
-       for (i = 0; i < nr_vcpus; i++)
-               pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
-
-       for (i = 0; i < nr_devices; i++)
-               pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
-
-       pthread_barrier_wait(&test_setup_barrier);
-
-       clock_gettime(CLOCK_MONOTONIC, &start);
-
-       for (i = 0; i < nr_devices; i++)
-               pthread_join(lpi_threads[i], NULL);
-
-       delta = timespec_elapsed(start);
-       write_guest_global(vm, test_data.request_vcpus_stop, true);
-
-       for (i = 0; i < nr_vcpus; i++)
-               pthread_join(vcpu_threads[i], NULL);
-
-       report_stats(delta);
-}
-
-static void setup_vm(void)
-{
-       int i;
-
-       vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
-       TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
-
-       vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
-
-       vm_init_descriptor_tables(vm);
-       for (i = 0; i < test_data.nr_cpus; i++)
-               vcpu_init_descriptor_tables(vcpus[i]);
-
-       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
-
-       setup_memslot();
-
-       setup_gic();
-
-       setup_test_data();
-}
-
-static void destroy_vm(void)
-{
-       close(its_fd);
-       close(gic_fd);
-       kvm_vm_free(vm);
-       free(vcpus);
-}
-
-static void pr_usage(const char *name)
-{
-       pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
-       pr_info("  -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
-       pr_info("  -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
-       pr_info("  -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
-       pr_info("  -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
-}
-
-int main(int argc, char **argv)
-{
-       u32 nr_threads;
-       int c;
-
-       while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
-               switch (c) {
-               case 'v':
-                       test_data.nr_cpus = atoi(optarg);
-                       break;
-               case 'd':
-                       test_data.nr_devices = atoi(optarg);
-                       break;
-               case 'e':
-                       test_data.nr_event_ids = atoi(optarg);
-                       break;
-               case 'i':
-                       nr_iterations = strtoul(optarg, NULL, 0);
-                       break;
-               case 'h':
-               default:
-                       pr_usage(argv[0]);
-                       return 1;
-               }
-       }
-
-       nr_threads = test_data.nr_cpus + test_data.nr_devices;
-       if (nr_threads > get_nprocs())
-               pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
-                        nr_threads, get_nprocs());
-
-       setup_vm();
-
-       run_test();
-
-       destroy_vm();
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
deleted file mode 100644 (file)
index f16b3b2..0000000
+++ /dev/null
@@ -1,648 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vpmu_counter_access - Test vPMU event counter access
- *
- * Copyright (c) 2023 Google LLC.
- *
- * This test checks if the guest can see the same number of the PMU event
- * counters (PMCR_EL0.N) that userspace sets, if the guest can access
- * those counters, and if the guest is prevented from accessing any
- * other counters.
- * It also checks if the userspace accesses to the PMU regsisters honor the
- * PMCR.N value that's set for the guest.
- * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
- */
-#include <kvm_util.h>
-#include <processor.h>
-#include <test_util.h>
-#include <vgic.h>
-#include <perf/arm_pmuv3.h>
-#include <linux/bitfield.h>
-
-/* The max number of the PMU event counters (excluding the cycle counter) */
-#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1)
-
-/* The cycle counter bit position that's common among the PMU registers */
-#define ARMV8_PMU_CYCLE_IDX            31
-
-struct vpmu_vm {
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       int gic_fd;
-};
-
-static struct vpmu_vm vpmu_vm;
-
-struct pmreg_sets {
-       uint64_t set_reg_id;
-       uint64_t clr_reg_id;
-};
-
-#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
-
-static uint64_t get_pmcr_n(uint64_t pmcr)
-{
-       return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
-}
-
-static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
-{
-       u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
-}
-
-static uint64_t get_counters_mask(uint64_t n)
-{
-       uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
-
-       if (n)
-               mask |= GENMASK(n - 1, 0);
-       return mask;
-}
-
-/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
-static inline unsigned long read_sel_evcntr(int sel)
-{
-       write_sysreg(sel, pmselr_el0);
-       isb();
-       return read_sysreg(pmxevcntr_el0);
-}
-
-/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
-static inline void write_sel_evcntr(int sel, unsigned long val)
-{
-       write_sysreg(sel, pmselr_el0);
-       isb();
-       write_sysreg(val, pmxevcntr_el0);
-       isb();
-}
-
-/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
-static inline unsigned long read_sel_evtyper(int sel)
-{
-       write_sysreg(sel, pmselr_el0);
-       isb();
-       return read_sysreg(pmxevtyper_el0);
-}
-
-/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
-static inline void write_sel_evtyper(int sel, unsigned long val)
-{
-       write_sysreg(sel, pmselr_el0);
-       isb();
-       write_sysreg(val, pmxevtyper_el0);
-       isb();
-}
-
-static void pmu_disable_reset(void)
-{
-       uint64_t pmcr = read_sysreg(pmcr_el0);
-
-       /* Reset all counters, disabling them */
-       pmcr &= ~ARMV8_PMU_PMCR_E;
-       write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
-       isb();
-}
-
-#define RETURN_READ_PMEVCNTRN(n) \
-       return read_sysreg(pmevcntr##n##_el0)
-static unsigned long read_pmevcntrn(int n)
-{
-       PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
-       return 0;
-}
-
-#define WRITE_PMEVCNTRN(n) \
-       write_sysreg(val, pmevcntr##n##_el0)
-static void write_pmevcntrn(int n, unsigned long val)
-{
-       PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
-       isb();
-}
-
-#define READ_PMEVTYPERN(n) \
-       return read_sysreg(pmevtyper##n##_el0)
-static unsigned long read_pmevtypern(int n)
-{
-       PMEVN_SWITCH(n, READ_PMEVTYPERN);
-       return 0;
-}
-
-#define WRITE_PMEVTYPERN(n) \
-       write_sysreg(val, pmevtyper##n##_el0)
-static void write_pmevtypern(int n, unsigned long val)
-{
-       PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
-       isb();
-}
-
-/*
- * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
- * accessors that test cases will use. Each of the accessors will
- * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
- * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
- * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
- *
- * This is used to test that combinations of those accessors provide
- * the consistent behavior.
- */
-struct pmc_accessor {
-       /* A function to be used to read PMEVTCNTR<n>_EL0 */
-       unsigned long   (*read_cntr)(int idx);
-       /* A function to be used to write PMEVTCNTR<n>_EL0 */
-       void            (*write_cntr)(int idx, unsigned long val);
-       /* A function to be used to read PMEVTYPER<n>_EL0 */
-       unsigned long   (*read_typer)(int idx);
-       /* A function to be used to write PMEVTYPER<n>_EL0 */
-       void            (*write_typer)(int idx, unsigned long val);
-};
-
-struct pmc_accessor pmc_accessors[] = {
-       /* test with all direct accesses */
-       { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
-       /* test with all indirect accesses */
-       { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
-       /* read with direct accesses, and write with indirect accesses */
-       { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
-       /* read with indirect accesses, and write with direct accesses */
-       { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
-};
-
-/*
- * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
- * assuming that the pointer is one of the entries in pmc_accessors[].
- */
-#define PMC_ACC_TO_IDX(acc)    (acc - &pmc_accessors[0])
-
-#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected)                    \
-{                                                                               \
-       uint64_t _tval = read_sysreg(regname);                                   \
-                                                                                \
-       if (set_expected)                                                        \
-               __GUEST_ASSERT((_tval & mask),                                   \
-                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
-                               _tval, mask, set_expected);                      \
-       else                                                                     \
-               __GUEST_ASSERT(!(_tval & mask),                                  \
-                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
-                               _tval, mask, set_expected);                      \
-}
-
-/*
- * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
- * are set or cleared as specified in @set_expected.
- */
-static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
-{
-       GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
-       GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
-}
-
-/*
- * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
- * to the specified counter (@pmc_idx) can be read/written as expected.
- * When @set_op is true, it tries to set the bit for the counter in
- * those registers by writing the SET registers (the bit won't be set
- * if the counter is not implemented though).
- * Otherwise, it tries to clear the bits in the registers by writing
- * the CLR registers.
- * Then, it checks if the values indicated in the registers are as expected.
- */
-static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
-{
-       uint64_t pmcr_n, test_bit = BIT(pmc_idx);
-       bool set_expected = false;
-
-       if (set_op) {
-               write_sysreg(test_bit, pmcntenset_el0);
-               write_sysreg(test_bit, pmintenset_el1);
-               write_sysreg(test_bit, pmovsset_el0);
-
-               /* The bit will be set only if the counter is implemented */
-               pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
-               set_expected = (pmc_idx < pmcr_n) ? true : false;
-       } else {
-               write_sysreg(test_bit, pmcntenclr_el0);
-               write_sysreg(test_bit, pmintenclr_el1);
-               write_sysreg(test_bit, pmovsclr_el0);
-       }
-       check_bitmap_pmu_regs(test_bit, set_expected);
-}
-
-/*
- * Tests for reading/writing registers for the (implemented) event counter
- * specified by @pmc_idx.
- */
-static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
-{
-       uint64_t write_data, read_data;
-
-       /* Disable all PMCs and reset all PMCs to zero. */
-       pmu_disable_reset();
-
-       /*
-        * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
-        */
-
-       /* Make sure that the bit in those registers are set to 0 */
-       test_bitmap_pmu_regs(pmc_idx, false);
-       /* Test if setting the bit in those registers works */
-       test_bitmap_pmu_regs(pmc_idx, true);
-       /* Test if clearing the bit in those registers works */
-       test_bitmap_pmu_regs(pmc_idx, false);
-
-       /*
-        * Tests for reading/writing the event type register.
-        */
-
-       /*
-        * Set the event type register to an arbitrary value just for testing
-        * of reading/writing the register.
-        * Arm ARM says that for the event from 0x0000 to 0x003F,
-        * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
-        * the value written to the field even when the specified event
-        * is not supported.
-        */
-       write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
-       acc->write_typer(pmc_idx, write_data);
-       read_data = acc->read_typer(pmc_idx);
-       __GUEST_ASSERT(read_data == write_data,
-                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
-                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
-
-       /*
-        * Tests for reading/writing the event count register.
-        */
-
-       read_data = acc->read_cntr(pmc_idx);
-
-       /* The count value must be 0, as it is disabled and reset */
-       __GUEST_ASSERT(read_data == 0,
-                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
-                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
-
-       write_data = read_data + pmc_idx + 0x12345;
-       acc->write_cntr(pmc_idx, write_data);
-       read_data = acc->read_cntr(pmc_idx);
-       __GUEST_ASSERT(read_data == write_data,
-                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
-                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
-}
-
-#define INVALID_EC     (-1ul)
-uint64_t expected_ec = INVALID_EC;
-
-static void guest_sync_handler(struct ex_regs *regs)
-{
-       uint64_t esr, ec;
-
-       esr = read_sysreg(esr_el1);
-       ec = ESR_ELx_EC(esr);
-
-       __GUEST_ASSERT(expected_ec == ec,
-                       "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
-                       regs->pc, esr, ec, expected_ec);
-
-       /* skip the trapping instruction */
-       regs->pc += 4;
-
-       /* Use INVALID_EC to indicate an exception occurred */
-       expected_ec = INVALID_EC;
-}
-
-/*
- * Run the given operation that should trigger an exception with the
- * given exception class. The exception handler (guest_sync_handler)
- * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
- * the instruction that trapped.
- */
-#define TEST_EXCEPTION(ec, ops)                                \
-({                                                     \
-       GUEST_ASSERT(ec != INVALID_EC);                 \
-       WRITE_ONCE(expected_ec, ec);                    \
-       dsb(ish);                                       \
-       ops;                                            \
-       GUEST_ASSERT(expected_ec == INVALID_EC);        \
-})
-
-/*
- * Tests for reading/writing registers for the unimplemented event counter
- * specified by @pmc_idx (>= PMCR_EL0.N).
- */
-static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
-{
-       /*
-        * Reading/writing the event count/type registers should cause
-        * an UNDEFINED exception.
-        */
-       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx));
-       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
-       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx));
-       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
-       /*
-        * The bit corresponding to the (unimplemented) counter in
-        * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
-        */
-       test_bitmap_pmu_regs(pmc_idx, 1);
-       test_bitmap_pmu_regs(pmc_idx, 0);
-}
-
-/*
- * The guest is configured with PMUv3 with @expected_pmcr_n number of
- * event counters.
- * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
- * if reading/writing PMU registers for implemented or unimplemented
- * counters works as expected.
- */
-static void guest_code(uint64_t expected_pmcr_n)
-{
-       uint64_t pmcr, pmcr_n, unimp_mask;
-       int i, pmc;
-
-       __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
-                       "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
-                       expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
-
-       pmcr = read_sysreg(pmcr_el0);
-       pmcr_n = get_pmcr_n(pmcr);
-
-       /* Make sure that PMCR_EL0.N indicates the value userspace set */
-       __GUEST_ASSERT(pmcr_n == expected_pmcr_n,
-                       "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
-                       expected_pmcr_n, pmcr_n);
-
-       /*
-        * Make sure that (RAZ) bits corresponding to unimplemented event
-        * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
-        * to zero.
-        * (NOTE: bits for implemented event counters are reset to UNKNOWN)
-        */
-       unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
-       check_bitmap_pmu_regs(unimp_mask, false);
-
-       /*
-        * Tests for reading/writing PMU registers for implemented counters.
-        * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
-        */
-       for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
-               for (pmc = 0; pmc < pmcr_n; pmc++)
-                       test_access_pmc_regs(&pmc_accessors[i], pmc);
-       }
-
-       /*
-        * Tests for reading/writing PMU registers for unimplemented counters.
-        * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
-        */
-       for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
-               for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
-                       test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
-       }
-
-       GUEST_DONE();
-}
-
-/* Create a VM that has one vCPU with PMUv3 configured. */
-static void create_vpmu_vm(void *guest_code)
-{
-       struct kvm_vcpu_init init;
-       uint8_t pmuver, ec;
-       uint64_t dfr0, irq = 23;
-       struct kvm_device_attr irq_attr = {
-               .group = KVM_ARM_VCPU_PMU_V3_CTRL,
-               .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
-               .addr = (uint64_t)&irq,
-       };
-       struct kvm_device_attr init_attr = {
-               .group = KVM_ARM_VCPU_PMU_V3_CTRL,
-               .attr = KVM_ARM_VCPU_PMU_V3_INIT,
-       };
-
-       /* The test creates the vpmu_vm multiple times. Ensure a clean state */
-       memset(&vpmu_vm, 0, sizeof(vpmu_vm));
-
-       vpmu_vm.vm = vm_create(1);
-       vm_init_descriptor_tables(vpmu_vm.vm);
-       for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) {
-               vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
-                                       guest_sync_handler);
-       }
-
-       /* Create vCPU with PMUv3 */
-       vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
-       init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
-       vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
-       vcpu_init_descriptor_tables(vpmu_vm.vcpu);
-       vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
-       __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
-                      "Failed to create vgic-v3, skipping");
-
-       /* Make sure that PMUv3 support is indicated in the ID register */
-       dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
-       pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
-       TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
-                   pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
-                   "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
-
-       /* Initialize vPMU */
-       vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
-       vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
-}
-
-static void destroy_vpmu_vm(void)
-{
-       close(vpmu_vm.gic_fd);
-       kvm_vm_free(vpmu_vm.vm);
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
-{
-       struct ucall uc;
-
-       vcpu_args_set(vcpu, 1, pmcr_n);
-       vcpu_run(vcpu);
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               break;
-       }
-}
-
-static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
-{
-       struct kvm_vcpu *vcpu;
-       uint64_t pmcr, pmcr_orig;
-
-       create_vpmu_vm(guest_code);
-       vcpu = vpmu_vm.vcpu;
-
-       pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
-       pmcr = pmcr_orig;
-
-       /*
-        * Setting a larger value of PMCR.N should not modify the field, and
-        * return a success.
-        */
-       set_pmcr_n(&pmcr, pmcr_n);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
-       pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
-
-       if (expect_fail)
-               TEST_ASSERT(pmcr_orig == pmcr,
-                           "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
-                           pmcr, pmcr_n);
-       else
-               TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
-                           "Failed to update PMCR.N to %lu (received: %lu)",
-                           pmcr_n, get_pmcr_n(pmcr));
-}
-
-/*
- * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
- * and run the test.
- */
-static void run_access_test(uint64_t pmcr_n)
-{
-       uint64_t sp;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vcpu_init init;
-
-       pr_debug("Test with pmcr_n %lu\n", pmcr_n);
-
-       test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
-       vcpu = vpmu_vm.vcpu;
-
-       /* Save the initial sp to restore them later to run the guest again */
-       sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
-
-       run_vcpu(vcpu, pmcr_n);
-
-       /*
-        * Reset and re-initialize the vCPU, and run the guest code again to
-        * check if PMCR_EL0.N is preserved.
-        */
-       vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
-       init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
-       aarch64_vcpu_setup(vcpu, &init);
-       vcpu_init_descriptor_tables(vcpu);
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
-
-       run_vcpu(vcpu, pmcr_n);
-
-       destroy_vpmu_vm();
-}
-
-static struct pmreg_sets validity_check_reg_sets[] = {
-       PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
-       PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
-       PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
-};
-
-/*
- * Create a VM, and check if KVM handles the userspace accesses of
- * the PMU register sets in @validity_check_reg_sets[] correctly.
- */
-static void run_pmregs_validity_test(uint64_t pmcr_n)
-{
-       int i;
-       struct kvm_vcpu *vcpu;
-       uint64_t set_reg_id, clr_reg_id, reg_val;
-       uint64_t valid_counters_mask, max_counters_mask;
-
-       test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
-       vcpu = vpmu_vm.vcpu;
-
-       valid_counters_mask = get_counters_mask(pmcr_n);
-       max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
-
-       for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
-               set_reg_id = validity_check_reg_sets[i].set_reg_id;
-               clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
-
-               /*
-                * Test if the 'set' and 'clr' variants of the registers
-                * are initialized based on the number of valid counters.
-                */
-               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
-               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
-                           KVM_ARM64_SYS_REG(set_reg_id), reg_val);
-
-               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
-               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
-                           KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
-
-               /*
-                * Using the 'set' variant, force-set the register to the
-                * max number of possible counters and test if KVM discards
-                * the bits for unimplemented counters as it should.
-                */
-               vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
-
-               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
-               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
-                           KVM_ARM64_SYS_REG(set_reg_id), reg_val);
-
-               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
-               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
-                           "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
-                           KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
-       }
-
-       destroy_vpmu_vm();
-}
-
-/*
- * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
- * the vCPU to @pmcr_n, which is larger than the host value.
- * The attempt should fail as @pmcr_n is too big to set for the vCPU.
- */
-static void run_error_test(uint64_t pmcr_n)
-{
-       pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
-
-       test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
-       destroy_vpmu_vm();
-}
-
-/*
- * Return the default number of implemented PMU event counters excluding
- * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
- */
-static uint64_t get_pmcr_n_limit(void)
-{
-       uint64_t pmcr;
-
-       create_vpmu_vm(guest_code);
-       pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
-       destroy_vpmu_vm();
-       return get_pmcr_n(pmcr);
-}
-
-int main(void)
-{
-       uint64_t i, pmcr_n;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
-
-       pmcr_n = get_pmcr_n_limit();
-       for (i = 0; i <= pmcr_n; i++) {
-               run_access_test(i);
-               run_pmregs_validity_test(i);
-       }
-
-       for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
-               run_error_test(i);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
new file mode 100644 (file)
index 0000000..447d61c
--- /dev/null
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * aarch32_id_regs - Test for ID register behavior on AArch64-only systems
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ
+ * and WI from userspace.
+ */
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+#define BAD_ID_REG_VAL 0x1badc0deul
+
+#define GUEST_ASSERT_REG_RAZ(reg)      GUEST_ASSERT_EQ(read_sysreg_s(reg), 0)
+
+static void guest_main(void)
+{
+       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1);
+       GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3));
+       GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1);
+       GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1);
+       GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7));
+
+       GUEST_DONE();
+}
+
+static void test_guest_raz(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+       }
+}
+
+static uint64_t raz_wi_reg_ids[] = {
+       KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1),
+       KVM_ARM64_SYS_REG(SYS_MVFR0_EL1),
+       KVM_ARM64_SYS_REG(SYS_MVFR1_EL1),
+       KVM_ARM64_SYS_REG(SYS_MVFR2_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1),
+       KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1),
+};
+
+static void test_user_raz_wi(struct kvm_vcpu *vcpu)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
+               uint64_t reg_id = raz_wi_reg_ids[i];
+               uint64_t val;
+
+               val = vcpu_get_reg(vcpu, reg_id);
+               TEST_ASSERT_EQ(val, 0);
+
+               /*
+                * Expect the ioctl to succeed with no effect on the register
+                * value.
+                */
+               vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+
+               val = vcpu_get_reg(vcpu, reg_id);
+               TEST_ASSERT_EQ(val, 0);
+       }
+}
+
+static uint64_t raz_invariant_reg_ids[] = {
+       KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
+       KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
+       KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
+       KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)),
+};
+
+static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
+{
+       int i, r;
+
+       for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
+               uint64_t reg_id = raz_invariant_reg_ids[i];
+               uint64_t val;
+
+               val = vcpu_get_reg(vcpu, reg_id);
+               TEST_ASSERT_EQ(val, 0);
+
+               r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+               TEST_ASSERT(r < 0 && errno == EINVAL,
+                           "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+               val = vcpu_get_reg(vcpu, reg_id);
+               TEST_ASSERT_EQ(val, 0);
+       }
+}
+
+
+
+static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
+{
+       uint64_t val, el0;
+
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+       el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+       return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       TEST_REQUIRE(vcpu_aarch64_only(vcpu));
+
+       test_user_raz_wi(vcpu);
+       test_user_raz_invariant(vcpu);
+       test_guest_raz(vcpu);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c
new file mode 100644 (file)
index 0000000..eeba1cc
--- /dev/null
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates both the virtual and physical timer IRQs using
+ * CVAL and TVAL registers.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+#include "arch_timer.h"
+#include "delay.h"
+#include "gic.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+#include "vgic.h"
+
+enum guest_stage {
+       GUEST_STAGE_VTIMER_CVAL = 1,
+       GUEST_STAGE_VTIMER_TVAL,
+       GUEST_STAGE_PTIMER_CVAL,
+       GUEST_STAGE_PTIMER_TVAL,
+       GUEST_STAGE_MAX,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+static void
+guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
+{
+       switch (shared_data->guest_stage) {
+       case GUEST_STAGE_VTIMER_CVAL:
+               timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cntct(VIRTUAL);
+               timer_set_ctl(VIRTUAL, CTL_ENABLE);
+               break;
+       case GUEST_STAGE_VTIMER_TVAL:
+               timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cntct(VIRTUAL);
+               timer_set_ctl(VIRTUAL, CTL_ENABLE);
+               break;
+       case GUEST_STAGE_PTIMER_CVAL:
+               timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cntct(PHYSICAL);
+               timer_set_ctl(PHYSICAL, CTL_ENABLE);
+               break;
+       case GUEST_STAGE_PTIMER_TVAL:
+               timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cntct(PHYSICAL);
+               timer_set_ctl(PHYSICAL, CTL_ENABLE);
+               break;
+       default:
+               GUEST_ASSERT(0);
+       }
+}
+
+static void guest_validate_irq(unsigned int intid,
+                               struct test_vcpu_shared_data *shared_data)
+{
+       enum guest_stage stage = shared_data->guest_stage;
+       uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
+       unsigned long xctl = 0;
+       unsigned int timer_irq = 0;
+       unsigned int accessor;
+
+       if (intid == IAR_SPURIOUS)
+               return;
+
+       switch (stage) {
+       case GUEST_STAGE_VTIMER_CVAL:
+       case GUEST_STAGE_VTIMER_TVAL:
+               accessor = VIRTUAL;
+               timer_irq = vtimer_irq;
+               break;
+       case GUEST_STAGE_PTIMER_CVAL:
+       case GUEST_STAGE_PTIMER_TVAL:
+               accessor = PHYSICAL;
+               timer_irq = ptimer_irq;
+               break;
+       default:
+               GUEST_ASSERT(0);
+               return;
+       }
+
+       xctl = timer_get_ctl(accessor);
+       if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
+               return;
+
+       timer_set_ctl(accessor, CTL_IMASK);
+       xcnt = timer_get_cntct(accessor);
+       cval = timer_get_cval(accessor);
+
+       xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+       /* Make sure we are dealing with the correct timer IRQ */
+       GUEST_ASSERT_EQ(intid, timer_irq);
+
+       /* Basic 'timer condition met' check */
+       __GUEST_ASSERT(xcnt >= cval,
+                      "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
+                      xcnt, cval, xcnt_diff_us);
+       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
+
+       WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+       unsigned int intid = gic_get_and_ack_irq();
+       uint32_t cpu = guest_get_vcpuid();
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+       guest_validate_irq(intid, shared_data);
+
+       gic_set_eoi(intid);
+}
+
+static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
+                               enum guest_stage stage)
+{
+       uint32_t irq_iter, config_iter;
+
+       shared_data->guest_stage = stage;
+       shared_data->nr_iter = 0;
+
+       for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+               /* Setup the next interrupt */
+               guest_configure_timer_action(shared_data);
+
+               /* Setup a timeout for the interrupt to arrive */
+               udelay(msecs_to_usecs(test_args.timer_period_ms) +
+                       test_args.timer_err_margin_us);
+
+               irq_iter = READ_ONCE(shared_data->nr_iter);
+               __GUEST_ASSERT(config_iter + 1 == irq_iter,
+                               "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+                               "  Guest timer interrupt was not triggered within the specified\n"
+                               "  interval, try to increase the error margin by [-e] option.\n",
+                               config_iter + 1, irq_iter);
+       }
+}
+
+static void guest_code(void)
+{
+       uint32_t cpu = guest_get_vcpuid();
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+       local_irq_disable();
+
+       gic_init(GIC_V3, test_args.nr_vcpus);
+
+       timer_set_ctl(VIRTUAL, CTL_IMASK);
+       timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+       gic_irq_enable(vtimer_irq);
+       gic_irq_enable(ptimer_irq);
+       local_irq_enable();
+
+       guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
+       guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
+       guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
+       guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
+
+       GUEST_DONE();
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm)
+{
+       /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
+       vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+                            KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+       vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+                            KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+       sync_global_to_guest(vm, ptimer_irq);
+       sync_global_to_guest(vm, vtimer_irq);
+
+       pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static int gic_fd;
+
+struct kvm_vm *test_vm_create(void)
+{
+       struct kvm_vm *vm;
+       unsigned int i;
+       int nr_vcpus = test_args.nr_vcpus;
+
+       vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+
+       vm_init_descriptor_tables(vm);
+       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+       if (!test_args.reserved) {
+               if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
+                       struct kvm_arm_counter_offset offset = {
+                               .counter_offset = test_args.counter_offset,
+                               .reserved = 0,
+                       };
+                       vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
+               } else
+                       TEST_FAIL("no support for global offset");
+       }
+
+       for (i = 0; i < nr_vcpus; i++)
+               vcpu_init_descriptor_tables(vcpus[i]);
+
+       test_init_timer_irq(vm);
+       gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
+       __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
+
+       /* Make all the test's cmdline args visible to the guest */
+       sync_global_to_guest(vm, test_args);
+
+       return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+       close(gic_fd);
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
new file mode 100644 (file)
index 0000000..a36a7e2
--- /dev/null
@@ -0,0 +1,1062 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
+ *
+ * The test validates some edge cases related to the arch-timer:
+ * - timers above the max TVAL value.
+ * - timers in the past
+ * - moving counters ahead and behind pending timers.
+ * - reprograming timers.
+ * - timers fired multiple times.
+ * - masking/unmasking using the timer control mask.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <pthread.h>
+#include <sys/sysinfo.h>
+
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+static const uint64_t CVAL_MAX = ~0ULL;
+/* tval is a signed 32-bit int. */
+static const int32_t TVAL_MAX = INT32_MAX;
+static const int32_t TVAL_MIN = INT32_MIN;
+
+/* After how much time we say there is no IRQ. */
+static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
+
+/* A nice counter value to use as the starting one for most tests. */
+static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+
+/* Number of runs. */
+static const uint32_t NR_TEST_ITERS_DEF = 5;
+
+/* Default wait test time in ms. */
+static const uint32_t WAIT_TEST_MS = 10;
+
+/* Default "long" wait test time in ms. */
+static const uint32_t LONG_WAIT_TEST_MS = 100;
+
+/* Shared with IRQ handler. */
+struct test_vcpu_shared_data {
+       atomic_t handled;
+       atomic_t spurious;
+} shared_data;
+
+struct test_args {
+       /* Virtual or physical timer and counter tests. */
+       enum arch_timer timer;
+       /* Delay used for most timer tests. */
+       uint64_t wait_ms;
+       /* Delay used in the test_long_timer_delays test. */
+       uint64_t long_wait_ms;
+       /* Number of iterations. */
+       int iterations;
+       /* Whether to test the physical timer. */
+       bool test_physical;
+       /* Whether to test the virtual timer. */
+       bool test_virtual;
+};
+
+struct test_args test_args = {
+       .wait_ms = WAIT_TEST_MS,
+       .long_wait_ms = LONG_WAIT_TEST_MS,
+       .iterations = NR_TEST_ITERS_DEF,
+       .test_physical = true,
+       .test_virtual = true,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+enum sync_cmd {
+       SET_COUNTER_VALUE,
+       USERSPACE_USLEEP,
+       USERSPACE_SCHED_YIELD,
+       USERSPACE_MIGRATE_SELF,
+       NO_USERSPACE_CMD,
+};
+
+typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
+static void sleep_migrate(enum arch_timer timer, uint64_t usec);
+
+sleep_method_t sleep_method[] = {
+       sleep_poll,
+       sleep_sched_poll,
+       sleep_migrate,
+       sleep_in_userspace,
+};
+
+typedef void (*irq_wait_method_t)(void);
+
+static void wait_for_non_spurious_irq(void);
+static void wait_poll_for_irq(void);
+static void wait_sched_poll_for_irq(void);
+static void wait_migrate_poll_for_irq(void);
+
+irq_wait_method_t irq_wait_method[] = {
+       wait_for_non_spurious_irq,
+       wait_poll_for_irq,
+       wait_sched_poll_for_irq,
+       wait_migrate_poll_for_irq,
+};
+
+enum timer_view {
+       TIMER_CVAL,
+       TIMER_TVAL,
+};
+
+static void assert_irqs_handled(uint32_t n)
+{
+       int h = atomic_read(&shared_data.handled);
+
+       __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
+}
+
+static void userspace_cmd(uint64_t cmd)
+{
+       GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
+}
+
+static void userspace_migrate_vcpu(void)
+{
+       userspace_cmd(USERSPACE_MIGRATE_SELF);
+}
+
+static void userspace_sleep(uint64_t usecs)
+{
+       GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
+}
+
+static void set_counter(enum arch_timer timer, uint64_t counter)
+{
+       GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+       unsigned int intid = gic_get_and_ack_irq();
+       enum arch_timer timer;
+       uint64_t cnt, cval;
+       uint32_t ctl;
+       bool timer_condition, istatus;
+
+       if (intid == IAR_SPURIOUS) {
+               atomic_inc(&shared_data.spurious);
+               goto out;
+       }
+
+       if (intid == ptimer_irq)
+               timer = PHYSICAL;
+       else if (intid == vtimer_irq)
+               timer = VIRTUAL;
+       else
+               goto out;
+
+       ctl = timer_get_ctl(timer);
+       cval = timer_get_cval(timer);
+       cnt = timer_get_cntct(timer);
+       timer_condition = cnt >= cval;
+       istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
+       GUEST_ASSERT_EQ(timer_condition, istatus);
+
+       /* Disable and mask the timer. */
+       timer_set_ctl(timer, CTL_IMASK);
+
+       atomic_inc(&shared_data.handled);
+
+out:
+       gic_set_eoi(intid);
+}
+
+static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
+                        uint32_t ctl)
+{
+       atomic_set(&shared_data.handled, 0);
+       atomic_set(&shared_data.spurious, 0);
+       timer_set_cval(timer, cval_cycles);
+       timer_set_ctl(timer, ctl);
+}
+
+static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
+                        uint32_t ctl)
+{
+       atomic_set(&shared_data.handled, 0);
+       atomic_set(&shared_data.spurious, 0);
+       timer_set_ctl(timer, ctl);
+       timer_set_tval(timer, tval_cycles);
+}
+
+static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
+                        enum timer_view tv)
+{
+       switch (tv) {
+       case TIMER_CVAL:
+               set_cval_irq(timer, xval, ctl);
+               break;
+       case TIMER_TVAL:
+               set_tval_irq(timer, xval, ctl);
+               break;
+       default:
+               GUEST_FAIL("Could not get timer %d", timer);
+       }
+}
+
+/*
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void wait_for_non_spurious_irq(void)
+{
+       int h;
+
+       local_irq_disable();
+
+       for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
+               wfi();
+               local_irq_enable();
+               isb(); /* handle IRQ */
+               local_irq_disable();
+       }
+}
+
+/*
+ * Wait for an non-spurious IRQ by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
+ *
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
+{
+       int h;
+
+       local_irq_disable();
+
+       h = atomic_read(&shared_data.handled);
+
+       local_irq_enable();
+       while (h == atomic_read(&shared_data.handled)) {
+               if (usp_cmd == NO_USERSPACE_CMD)
+                       cpu_relax();
+               else
+                       userspace_cmd(usp_cmd);
+       }
+       local_irq_disable();
+}
+
+static void wait_poll_for_irq(void)
+{
+       poll_for_non_spurious_irq(NO_USERSPACE_CMD);
+}
+
+static void wait_sched_poll_for_irq(void)
+{
+       poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
+}
+
+static void wait_migrate_poll_for_irq(void)
+{
+       poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
+}
+
+/*
+ * Sleep for usec microseconds by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
+ */
+static void guest_poll(enum arch_timer test_timer, uint64_t usec,
+                      enum sync_cmd usp_cmd)
+{
+       uint64_t cycles = usec_to_cycles(usec);
+       /* Whichever timer we are testing with, sleep with the other. */
+       enum arch_timer sleep_timer = 1 - test_timer;
+       uint64_t start = timer_get_cntct(sleep_timer);
+
+       while ((timer_get_cntct(sleep_timer) - start) < cycles) {
+               if (usp_cmd == NO_USERSPACE_CMD)
+                       cpu_relax();
+               else
+                       userspace_cmd(usp_cmd);
+       }
+}
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec)
+{
+       guest_poll(timer, usec, NO_USERSPACE_CMD);
+}
+
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
+{
+       guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
+}
+
+static void sleep_migrate(enum arch_timer timer, uint64_t usec)
+{
+       guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
+}
+
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
+{
+       userspace_sleep(usec);
+}
+
+/*
+ * Reset the timer state to some nice values like the counter not being close
+ * to the edge, and the control register masked and disabled.
+ */
+static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
+{
+       set_counter(timer, cnt);
+       timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timer_xval(enum arch_timer timer, uint64_t xval,
+                           enum timer_view tv, irq_wait_method_t wm, bool reset_state,
+                           uint64_t reset_cnt)
+{
+       local_irq_disable();
+
+       if (reset_state)
+               reset_timer_state(timer, reset_cnt);
+
+       set_xval_irq(timer, xval, CTL_ENABLE, tv);
+
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wm();
+
+       assert_irqs_handled(1);
+       local_irq_enable();
+}
+
+/*
+ * The test_timer_* functions will program the timer, wait for it, and assert
+ * the firing of the correct IRQ.
+ *
+ * These functions don't have a timeout and return as soon as they receive an
+ * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
+ * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
+ */
+
+static void test_timer_cval(enum arch_timer timer, uint64_t cval,
+                           irq_wait_method_t wm, bool reset_state,
+                           uint64_t reset_cnt)
+{
+       test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
+}
+
+static void test_timer_tval(enum arch_timer timer, int32_t tval,
+                           irq_wait_method_t wm, bool reset_state,
+                           uint64_t reset_cnt)
+{
+       test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
+                       reset_cnt);
+}
+
+static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
+                                  uint64_t usec, enum timer_view timer_view,
+                                  sleep_method_t guest_sleep)
+{
+       local_irq_disable();
+
+       set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
+       guest_sleep(timer, usec);
+
+       local_irq_enable();
+       isb();
+
+       /* Assume success (no IRQ) after waiting usec microseconds */
+       assert_irqs_handled(0);
+}
+
+static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
+                            uint64_t usec, sleep_method_t wm)
+{
+       test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
+}
+
+static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
+                            sleep_method_t wm)
+{
+       /* tval will be cast to an int32_t in test_xval_check_no_irq */
+       test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
+}
+
+/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
+static void test_timer_control_mask_then_unmask(enum arch_timer timer)
+{
+       reset_timer_state(timer, DEF_CNT);
+       set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+       /* Unmask the timer, and then get an IRQ. */
+       local_irq_disable();
+       timer_set_ctl(timer, CTL_ENABLE);
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wait_for_non_spurious_irq();
+
+       assert_irqs_handled(1);
+       local_irq_enable();
+}
+
+/* Check that timer control masks actually mask a timer being fired. */
+static void test_timer_control_masks(enum arch_timer timer)
+{
+       reset_timer_state(timer, DEF_CNT);
+
+       /* Local IRQs are not masked at this point. */
+
+       set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+       /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+       sleep_poll(timer, TIMEOUT_NO_IRQ_US);
+
+       assert_irqs_handled(0);
+       timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_fire_a_timer_multiple_times(enum arch_timer timer,
+                                            irq_wait_method_t wm, int num)
+{
+       int i;
+
+       local_irq_disable();
+       reset_timer_state(timer, DEF_CNT);
+
+       set_tval_irq(timer, 0, CTL_ENABLE);
+
+       for (i = 1; i <= num; i++) {
+               /* This method re-enables IRQs to handle the one we're looking for. */
+               wm();
+
+               /* The IRQ handler masked and disabled the timer.
+                * Enable and unmmask it again.
+                */
+               timer_set_ctl(timer, CTL_ENABLE);
+
+               assert_irqs_handled(i);
+       }
+
+       local_irq_enable();
+}
+
+static void test_timers_fired_multiple_times(enum arch_timer timer)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
+               test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
+}
+
+/*
+ * Set a timer for tval=delta_1_ms then reprogram it to
+ * tval=delta_2_ms. Check that we get the timer fired. There is no
+ * timeout for the wait: we use the wfi instruction.
+ */
+static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
+                                    int32_t delta_1_ms, int32_t delta_2_ms)
+{
+       local_irq_disable();
+       reset_timer_state(timer, DEF_CNT);
+
+       /* Program the timer to DEF_CNT + delta_1_ms. */
+       set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
+
+       /* Reprogram the timer to DEF_CNT + delta_2_ms. */
+       timer_set_tval(timer, msec_to_cycles(delta_2_ms));
+
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wm();
+
+       /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
+       GUEST_ASSERT(timer_get_cntct(timer) >=
+                    DEF_CNT + msec_to_cycles(delta_2_ms));
+
+       local_irq_enable();
+       assert_irqs_handled(1);
+};
+
+static void test_reprogram_timers(enum arch_timer timer)
+{
+       int i;
+       uint64_t base_wait = test_args.wait_ms;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               /*
+                * Ensure reprogramming works whether going from a
+                * longer time to a shorter or vice versa.
+                */
+               test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
+                                        base_wait);
+               test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
+                                        2 * base_wait);
+       }
+}
+
+static void test_basic_functionality(enum arch_timer timer)
+{
+       int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
+       uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               irq_wait_method_t wm = irq_wait_method[i];
+
+               test_timer_cval(timer, cval, wm, true, DEF_CNT);
+               test_timer_tval(timer, tval, wm, true, DEF_CNT);
+       }
+}
+
+/*
+ * This test checks basic timer behavior without actually firing timers, things
+ * like: the relationship between cval and tval, tval down-counting.
+ */
+static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
+{
+       reset_timer_state(timer, DEF_CNT);
+
+       local_irq_disable();
+
+       /* cval in the past */
+       timer_set_cval(timer,
+                      timer_get_cntct(timer) -
+                      msec_to_cycles(test_args.wait_ms));
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+       /* tval in the past */
+       timer_set_tval(timer, -1);
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
+
+       /* tval larger than TVAL_MAX. This requires programming with
+        * timer_set_cval instead so the value is expressible
+        */
+       timer_set_cval(timer,
+                      timer_get_cntct(timer) + TVAL_MAX +
+                      msec_to_cycles(test_args.wait_ms));
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_tval(timer) <= 0);
+
+       /*
+        * tval larger than 2 * TVAL_MAX.
+        * Twice the TVAL_MAX completely loops around the TVAL.
+        */
+       timer_set_cval(timer,
+                      timer_get_cntct(timer) + 2ULL * TVAL_MAX +
+                      msec_to_cycles(test_args.wait_ms));
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_tval(timer) <=
+                      msec_to_cycles(test_args.wait_ms));
+
+       /* negative tval that rollovers from 0. */
+       set_counter(timer, msec_to_cycles(1));
+       timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
+       if (use_sched)
+               userspace_migrate_vcpu();
+       GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
+
+       /* tval should keep down-counting from 0 to -1. */
+       timer_set_tval(timer, 0);
+       sleep_poll(timer, 1);
+       GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+       local_irq_enable();
+
+       /* Mask and disable any pending timer. */
+       timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timers_sanity_checks(enum arch_timer timer)
+{
+       timers_sanity_checks(timer, false);
+       /* Check how KVM saves/restores these edge-case values. */
+       timers_sanity_checks(timer, true);
+}
+
+static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
+{
+       local_irq_disable();
+       reset_timer_state(timer, DEF_CNT);
+
+       set_cval_irq(timer,
+                    (uint64_t) TVAL_MAX +
+                    msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
+
+       set_counter(timer, TVAL_MAX);
+
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wm();
+
+       assert_irqs_handled(1);
+       local_irq_enable();
+}
+
+/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
+static void test_timers_above_tval_max(enum arch_timer timer)
+{
+       uint64_t cval;
+       int i;
+
+       /*
+        * Test that the system is not implementing cval in terms of
+        * tval.  If that was the case, setting a cval to "cval = now
+        * + TVAL_MAX + wait_ms" would wrap to "cval = now +
+        * wait_ms", and the timer would fire immediately. Test that it
+        * doesn't.
+        */
+       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+               reset_timer_state(timer, DEF_CNT);
+               cval = timer_get_cntct(timer) + TVAL_MAX +
+                       msec_to_cycles(test_args.wait_ms);
+               test_cval_no_irq(timer, cval,
+                                msecs_to_usecs(test_args.wait_ms) +
+                                TIMEOUT_NO_IRQ_US, sleep_method[i]);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               /* Get the IRQ by moving the counter forward. */
+               test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
+       }
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests.  It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
+                                   uint64_t xval, uint64_t cnt_2,
+                                   irq_wait_method_t wm, enum timer_view tv)
+{
+       local_irq_disable();
+
+       set_counter(timer, cnt_1);
+       timer_set_ctl(timer, CTL_IMASK);
+
+       set_xval_irq(timer, xval, CTL_ENABLE, tv);
+       set_counter(timer, cnt_2);
+       /* This method re-enables IRQs to handle the one we're looking for. */
+       wm();
+
+       assert_irqs_handled(1);
+       local_irq_enable();
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests.  It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
+                                          uint64_t cnt_1, uint64_t xval,
+                                          uint64_t cnt_2,
+                                          sleep_method_t guest_sleep,
+                                          enum timer_view tv)
+{
+       local_irq_disable();
+
+       set_counter(timer, cnt_1);
+       timer_set_ctl(timer, CTL_IMASK);
+
+       set_xval_irq(timer, xval, CTL_ENABLE, tv);
+       set_counter(timer, cnt_2);
+       guest_sleep(timer, TIMEOUT_NO_IRQ_US);
+
+       local_irq_enable();
+       isb();
+
+       /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+       assert_irqs_handled(0);
+       timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
+                                   int32_t tval, uint64_t cnt_2,
+                                   irq_wait_method_t wm)
+{
+       test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
+                                   uint64_t cval, uint64_t cnt_2,
+                                   irq_wait_method_t wm)
+{
+       test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
+}
+
+static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
+                                          uint64_t cnt_1, int32_t tval,
+                                          uint64_t cnt_2, sleep_method_t wm)
+{
+       test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
+                                      TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
+                                          uint64_t cnt_1, uint64_t cval,
+                                          uint64_t cnt_2, sleep_method_t wm)
+{
+       test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
+                                      TIMER_CVAL);
+}
+
+/* Set a timer and then move the counter ahead of it. */
+static void test_move_counters_ahead_of_timers(enum arch_timer timer)
+{
+       int i;
+       int32_t tval;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               irq_wait_method_t wm = irq_wait_method[i];
+
+               test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
+               test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
+
+               /* Move counter ahead of negative tval. */
+               test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
+               test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
+               tval = TVAL_MAX;
+               test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
+                                       wm);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+               sleep_method_t sm = sleep_method[i];
+
+               test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
+       }
+}
+
+/*
+ * Program a timer, mask it, and then change the tval or counter to cancel it.
+ * Unmask it and check that nothing fires.
+ */
+static void test_move_counters_behind_timers(enum arch_timer timer)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+               sleep_method_t sm = sleep_method[i];
+
+               test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
+                                              sm);
+               test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
+       }
+}
+
+static void test_timers_in_the_past(enum arch_timer timer)
+{
+       int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
+       uint64_t cval;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               irq_wait_method_t wm = irq_wait_method[i];
+
+               /* set a timer wait_ms the past. */
+               cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
+               test_timer_cval(timer, cval, wm, true, DEF_CNT);
+               test_timer_tval(timer, tval, wm, true, DEF_CNT);
+
+               /* Set a timer to counter=0 (in the past) */
+               test_timer_cval(timer, 0, wm, true, DEF_CNT);
+
+               /* Set a time for tval=0 (now) */
+               test_timer_tval(timer, 0, wm, true, DEF_CNT);
+
+               /* Set a timer to as far in the past as possible */
+               test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
+       }
+
+       /*
+        * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
+        * IRQ as that tval means cval=CVAL_MAX-wait_ms.
+        */
+       for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+               sleep_method_t sm = sleep_method[i];
+
+               set_counter(timer, msec_to_cycles(test_args.wait_ms));
+               test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
+       }
+}
+
+static void test_long_timer_delays(enum arch_timer timer)
+{
+       int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
+       uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+               irq_wait_method_t wm = irq_wait_method[i];
+
+               test_timer_cval(timer, cval, wm, true, DEF_CNT);
+               test_timer_tval(timer, tval, wm, true, DEF_CNT);
+       }
+}
+
+static void guest_run_iteration(enum arch_timer timer)
+{
+       test_basic_functionality(timer);
+       test_timers_sanity_checks(timer);
+
+       test_timers_above_tval_max(timer);
+       test_timers_in_the_past(timer);
+
+       test_move_counters_ahead_of_timers(timer);
+       test_move_counters_behind_timers(timer);
+       test_reprogram_timers(timer);
+
+       test_timers_fired_multiple_times(timer);
+
+       test_timer_control_mask_then_unmask(timer);
+       test_timer_control_masks(timer);
+}
+
+static void guest_code(enum arch_timer timer)
+{
+       int i;
+
+       local_irq_disable();
+
+       gic_init(GIC_V3, 1);
+
+       timer_set_ctl(VIRTUAL, CTL_IMASK);
+       timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+       gic_irq_enable(vtimer_irq);
+       gic_irq_enable(ptimer_irq);
+       local_irq_enable();
+
+       for (i = 0; i < test_args.iterations; i++) {
+               GUEST_SYNC(i);
+               guest_run_iteration(timer);
+       }
+
+       test_long_timer_delays(timer);
+       GUEST_DONE();
+}
+
+static uint32_t next_pcpu(void)
+{
+       uint32_t max = get_nprocs();
+       uint32_t cur = sched_getcpu();
+       uint32_t next = cur;
+       cpu_set_t cpuset;
+
+       TEST_ASSERT(max > 1, "Need at least two physical cpus");
+
+       sched_getaffinity(0, sizeof(cpuset), &cpuset);
+
+       do {
+               next = (next + 1) % CPU_SETSIZE;
+       } while (!CPU_ISSET(next, &cpuset));
+
+       return next;
+}
+
+static void migrate_self(uint32_t new_pcpu)
+{
+       int ret;
+       cpu_set_t cpuset;
+       pthread_t thread;
+
+       thread = pthread_self();
+
+       CPU_ZERO(&cpuset);
+       CPU_SET(new_pcpu, &cpuset);
+
+       pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
+
+       ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+
+       TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
+                   new_pcpu, ret);
+}
+
+static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
+                          enum arch_timer timer)
+{
+       if (timer == PHYSICAL)
+               vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
+       else
+               vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+       enum sync_cmd cmd = uc->args[1];
+       uint64_t val = uc->args[2];
+       enum arch_timer timer = uc->args[3];
+
+       switch (cmd) {
+       case SET_COUNTER_VALUE:
+               kvm_set_cntxct(vcpu, val, timer);
+               break;
+       case USERSPACE_USLEEP:
+               usleep(val);
+               break;
+       case USERSPACE_SCHED_YIELD:
+               sched_yield();
+               break;
+       case USERSPACE_MIGRATE_SELF:
+               migrate_self(next_pcpu());
+               break;
+       default:
+               break;
+       }
+}
+
+static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       /* Start on CPU 0 */
+       migrate_self(0);
+
+       while (true) {
+               vcpu_run(vcpu);
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       handle_sync(vcpu, &uc);
+                       break;
+               case UCALL_DONE:
+                       goto out;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       goto out;
+               default:
+                       TEST_FAIL("Unexpected guest exit\n");
+               }
+       }
+
+ out:
+       return;
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+                            KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+       vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+                            KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+       sync_global_to_guest(vm, ptimer_irq);
+       sync_global_to_guest(vm, vtimer_irq);
+
+       pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
+                          enum arch_timer timer)
+{
+       *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       TEST_ASSERT(*vm, "Failed to create the test VM\n");
+
+       vm_init_descriptor_tables(*vm);
+       vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
+                                    guest_irq_handler);
+
+       vcpu_init_descriptor_tables(*vcpu);
+       vcpu_args_set(*vcpu, 1, timer);
+
+       test_init_timer_irq(*vm, *vcpu);
+       vgic_v3_setup(*vm, 1, 64);
+       sync_global_to_guest(*vm, test_args);
+}
+
+static void test_print_help(char *name)
+{
+       pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
+               , name);
+       pr_info("\t-i: Number of iterations (default: %u)\n",
+               NR_TEST_ITERS_DEF);
+       pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
+       pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
+            LONG_WAIT_TEST_MS);
+       pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+               WAIT_TEST_MS);
+       pr_info("\t-p: Test physical timer (default: true)\n");
+       pr_info("\t-v: Test virtual timer (default: true)\n");
+       pr_info("\t-h: Print this help message\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+       int opt;
+
+       while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
+               switch (opt) {
+               case 'b':
+                       test_args.test_physical = true;
+                       test_args.test_virtual = true;
+                       break;
+               case 'i':
+                       test_args.iterations =
+                           atoi_positive("Number of iterations", optarg);
+                       break;
+               case 'l':
+                       test_args.long_wait_ms =
+                           atoi_positive("Long wait time", optarg);
+                       break;
+               case 'p':
+                       test_args.test_physical = true;
+                       test_args.test_virtual = false;
+                       break;
+               case 'v':
+                       test_args.test_virtual = true;
+                       test_args.test_physical = false;
+                       break;
+               case 'w':
+                       test_args.wait_ms = atoi_positive("Wait time", optarg);
+                       break;
+               case 'h':
+               default:
+                       goto err;
+               }
+       }
+
+       return true;
+
+ err:
+       test_print_help(argv[0]);
+       return false;
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /* Tell stdout not to buffer its content */
+       setbuf(stdout, NULL);
+
+       if (!parse_args(argc, argv))
+               exit(KSFT_SKIP);
+
+       if (test_args.test_virtual) {
+               test_vm_create(&vm, &vcpu, VIRTUAL);
+               test_run(vm, vcpu);
+               kvm_vm_free(vm);
+       }
+
+       if (test_args.test_physical) {
+               test_vm_create(&vm, &vcpu, PHYSICAL);
+               test_run(vm, vcpu);
+               kvm_vm_free(vm);
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
new file mode 100644 (file)
index 0000000..c7fb55c
--- /dev/null
@@ -0,0 +1,607 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <linux/bitfield.h>
+
+#define MDSCR_KDE      (1 << 13)
+#define MDSCR_MDE      (1 << 15)
+#define MDSCR_SS       (1 << 0)
+
+#define DBGBCR_LEN8    (0xff << 5)
+#define DBGBCR_EXEC    (0x0 << 3)
+#define DBGBCR_EL1     (0x1 << 1)
+#define DBGBCR_E       (0x1 << 0)
+#define DBGBCR_LBN_SHIFT       16
+#define DBGBCR_BT_SHIFT                20
+#define DBGBCR_BT_ADDR_LINK_CTX        (0x1 << DBGBCR_BT_SHIFT)
+#define DBGBCR_BT_CTX_LINK     (0x3 << DBGBCR_BT_SHIFT)
+
+#define DBGWCR_LEN8    (0xff << 5)
+#define DBGWCR_RD      (0x1 << 3)
+#define DBGWCR_WR      (0x2 << 3)
+#define DBGWCR_EL1     (0x1 << 1)
+#define DBGWCR_E       (0x1 << 0)
+#define DBGWCR_LBN_SHIFT       16
+#define DBGWCR_WT_SHIFT                20
+#define DBGWCR_WT_LINK         (0x1 << DBGWCR_WT_SHIFT)
+
+#define SPSR_D         (1 << 9)
+#define SPSR_SS                (1 << 21)
+
+extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
+extern unsigned char iter_ss_begin, iter_ss_end;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define  PC(v)  ((uint64_t)&(v))
+
+#define GEN_DEBUG_WRITE_REG(reg_name)                  \
+static void write_##reg_name(int num, uint64_t val)    \
+{                                                      \
+       switch (num) {                                  \
+       case 0:                                         \
+               write_sysreg(val, reg_name##0_el1);     \
+               break;                                  \
+       case 1:                                         \
+               write_sysreg(val, reg_name##1_el1);     \
+               break;                                  \
+       case 2:                                         \
+               write_sysreg(val, reg_name##2_el1);     \
+               break;                                  \
+       case 3:                                         \
+               write_sysreg(val, reg_name##3_el1);     \
+               break;                                  \
+       case 4:                                         \
+               write_sysreg(val, reg_name##4_el1);     \
+               break;                                  \
+       case 5:                                         \
+               write_sysreg(val, reg_name##5_el1);     \
+               break;                                  \
+       case 6:                                         \
+               write_sysreg(val, reg_name##6_el1);     \
+               break;                                  \
+       case 7:                                         \
+               write_sysreg(val, reg_name##7_el1);     \
+               break;                                  \
+       case 8:                                         \
+               write_sysreg(val, reg_name##8_el1);     \
+               break;                                  \
+       case 9:                                         \
+               write_sysreg(val, reg_name##9_el1);     \
+               break;                                  \
+       case 10:                                        \
+               write_sysreg(val, reg_name##10_el1);    \
+               break;                                  \
+       case 11:                                        \
+               write_sysreg(val, reg_name##11_el1);    \
+               break;                                  \
+       case 12:                                        \
+               write_sysreg(val, reg_name##12_el1);    \
+               break;                                  \
+       case 13:                                        \
+               write_sysreg(val, reg_name##13_el1);    \
+               break;                                  \
+       case 14:                                        \
+               write_sysreg(val, reg_name##14_el1);    \
+               break;                                  \
+       case 15:                                        \
+               write_sysreg(val, reg_name##15_el1);    \
+               break;                                  \
+       default:                                        \
+               GUEST_ASSERT(0);                        \
+       }                                               \
+}
+
+/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
+GEN_DEBUG_WRITE_REG(dbgbcr)
+GEN_DEBUG_WRITE_REG(dbgbvr)
+GEN_DEBUG_WRITE_REG(dbgwcr)
+GEN_DEBUG_WRITE_REG(dbgwvr)
+
+static void reset_debug_state(void)
+{
+       uint8_t brps, wrps, i;
+       uint64_t dfr0;
+
+       asm volatile("msr daifset, #8");
+
+       write_sysreg(0, osdlr_el1);
+       write_sysreg(0, oslar_el1);
+       isb();
+
+       write_sysreg(0, mdscr_el1);
+       write_sysreg(0, contextidr_el1);
+
+       /* Reset all bcr/bvr/wcr/wvr registers */
+       dfr0 = read_sysreg(id_aa64dfr0_el1);
+       brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
+       for (i = 0; i <= brps; i++) {
+               write_dbgbcr(i, 0);
+               write_dbgbvr(i, 0);
+       }
+       wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
+       for (i = 0; i <= wrps; i++) {
+               write_dbgwcr(i, 0);
+               write_dbgwvr(i, 0);
+       }
+
+       isb();
+}
+
+static void enable_os_lock(void)
+{
+       write_sysreg(1, oslar_el1);
+       isb();
+
+       GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
+}
+
+static void enable_monitor_debug_exceptions(void)
+{
+       uint32_t mdscr;
+
+       asm volatile("msr daifclr, #8");
+
+       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+       write_sysreg(mdscr, mdscr_el1);
+       isb();
+}
+
+static void install_wp(uint8_t wpn, uint64_t addr)
+{
+       uint32_t wcr;
+
+       wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+       write_dbgwcr(wpn, wcr);
+       write_dbgwvr(wpn, addr);
+
+       isb();
+
+       enable_monitor_debug_exceptions();
+}
+
+static void install_hw_bp(uint8_t bpn, uint64_t addr)
+{
+       uint32_t bcr;
+
+       bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+       write_dbgbcr(bpn, bcr);
+       write_dbgbvr(bpn, addr);
+       isb();
+
+       enable_monitor_debug_exceptions();
+}
+
+static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
+                          uint64_t ctx)
+{
+       uint32_t wcr;
+       uint64_t ctx_bcr;
+
+       /* Setup a context-aware breakpoint for Linked Context ID Match */
+       ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+                 DBGBCR_BT_CTX_LINK;
+       write_dbgbcr(ctx_bp, ctx_bcr);
+       write_dbgbvr(ctx_bp, ctx);
+
+       /* Setup a linked watchpoint (linked to the context-aware breakpoint) */
+       wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
+             DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
+       write_dbgwcr(addr_wp, wcr);
+       write_dbgwvr(addr_wp, addr);
+       isb();
+
+       enable_monitor_debug_exceptions();
+}
+
+void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
+                      uint64_t ctx)
+{
+       uint32_t addr_bcr, ctx_bcr;
+
+       /* Setup a context-aware breakpoint for Linked Context ID Match */
+       ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+                 DBGBCR_BT_CTX_LINK;
+       write_dbgbcr(ctx_bp, ctx_bcr);
+       write_dbgbvr(ctx_bp, ctx);
+
+       /*
+        * Setup a normal breakpoint for Linked Address Match, and link it
+        * to the context-aware breakpoint.
+        */
+       addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+                  DBGBCR_BT_ADDR_LINK_CTX |
+                  ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
+       write_dbgbcr(addr_bp, addr_bcr);
+       write_dbgbvr(addr_bp, addr);
+       isb();
+
+       enable_monitor_debug_exceptions();
+}
+
+static void install_ss(void)
+{
+       uint32_t mdscr;
+
+       asm volatile("msr daifclr, #8");
+
+       mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+       write_sysreg(mdscr, mdscr_el1);
+       isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+       uint64_t ctx = 0xabcdef;        /* a random context number */
+
+       /* Software-breakpoint */
+       reset_debug_state();
+       asm volatile("sw_bp: brk #0");
+       GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+       /* Hardware-breakpoint */
+       reset_debug_state();
+       install_hw_bp(bpn, PC(hw_bp));
+       asm volatile("hw_bp: nop");
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+       /* Hardware-breakpoint + svc */
+       reset_debug_state();
+       install_hw_bp(bpn, PC(bp_svc));
+       asm volatile("bp_svc: svc #0");
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+       GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+       /* Hardware-breakpoint + software-breakpoint */
+       reset_debug_state();
+       install_hw_bp(bpn, PC(bp_brk));
+       asm volatile("bp_brk: brk #0");
+       GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+       /* Watchpoint */
+       reset_debug_state();
+       install_wp(wpn, PC(write_data));
+       write_data = 'x';
+       GUEST_ASSERT_EQ(write_data, 'x');
+       GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+       /* Single-step */
+       reset_debug_state();
+       install_ss();
+       ss_idx = 0;
+       asm volatile("ss_start:\n"
+                    "mrs x0, esr_el1\n"
+                    "add x0, x0, #1\n"
+                    "msr daifset, #8\n"
+                    : : : "x0");
+       GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+       GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+       GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+       /* OS Lock does not block software-breakpoint */
+       reset_debug_state();
+       enable_os_lock();
+       sw_bp_addr = 0;
+       asm volatile("sw_bp2: brk #0");
+       GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
+
+       /* OS Lock blocking hardware-breakpoint */
+       reset_debug_state();
+       enable_os_lock();
+       install_hw_bp(bpn, PC(hw_bp2));
+       hw_bp_addr = 0;
+       asm volatile("hw_bp2: nop");
+       GUEST_ASSERT_EQ(hw_bp_addr, 0);
+
+       /* OS Lock blocking watchpoint */
+       reset_debug_state();
+       enable_os_lock();
+       write_data = '\0';
+       wp_data_addr = 0;
+       install_wp(wpn, PC(write_data));
+       write_data = 'x';
+       GUEST_ASSERT_EQ(write_data, 'x');
+       GUEST_ASSERT_EQ(wp_data_addr, 0);
+
+       /* OS Lock blocking single-step */
+       reset_debug_state();
+       enable_os_lock();
+       ss_addr[0] = 0;
+       install_ss();
+       ss_idx = 0;
+       asm volatile("mrs x0, esr_el1\n\t"
+                    "add x0, x0, #1\n\t"
+                    "msr daifset, #8\n\t"
+                    : : : "x0");
+       GUEST_ASSERT_EQ(ss_addr[0], 0);
+
+       /* Linked hardware-breakpoint */
+       hw_bp_addr = 0;
+       reset_debug_state();
+       install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
+       /* Set context id */
+       write_sysreg(ctx, contextidr_el1);
+       isb();
+       asm volatile("hw_bp_ctx: nop");
+       write_sysreg(0, contextidr_el1);
+       GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
+
+       /* Linked watchpoint */
+       reset_debug_state();
+       install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
+       /* Set context id */
+       write_sysreg(ctx, contextidr_el1);
+       isb();
+       write_data = 'x';
+       GUEST_ASSERT_EQ(write_data, 'x');
+       GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+       GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+       sw_bp_addr = regs->pc;
+       regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+       hw_bp_addr = regs->pc;
+       regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+       wp_data_addr = read_sysreg(far_el1);
+       wp_addr = regs->pc;
+       regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
+       ss_addr[ss_idx++] = regs->pc;
+       regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+       svc_addr = regs->pc;
+}
+
+static void guest_code_ss(int test_cnt)
+{
+       uint64_t i;
+       uint64_t bvr, wvr, w_bvr, w_wvr;
+
+       for (i = 0; i < test_cnt; i++) {
+               /* Bits [1:0] of dbg{b,w}vr are RES0 */
+               w_bvr = i << 2;
+               w_wvr = i << 2;
+
+               /*
+                * Enable Single Step execution.  Note!  This _must_ be a bare
+                * ucall as the ucall() path uses atomic operations to manage
+                * the ucall structures, and the built-in "atomics" are usually
+                * implemented via exclusive access instructions.  The exlusive
+                * monitor is cleared on ERET, and so taking debug exceptions
+                * during a LDREX=>STREX sequence will prevent forward progress
+                * and hang the guest/test.
+                */
+               GUEST_UCALL_NONE();
+
+               /*
+                * The userspace will verify that the pc is as expected during
+                * single step execution between iter_ss_begin and iter_ss_end.
+                */
+               asm volatile("iter_ss_begin:nop\n");
+
+               write_sysreg(w_bvr, dbgbvr0_el1);
+               write_sysreg(w_wvr, dbgwvr0_el1);
+               bvr = read_sysreg(dbgbvr0_el1);
+               wvr = read_sysreg(dbgwvr0_el1);
+
+               /* Userspace disables Single Step when the end is nigh. */
+               asm volatile("iter_ss_end:\n");
+
+               GUEST_ASSERT_EQ(bvr, w_bvr);
+               GUEST_ASSERT_EQ(wvr, w_wvr);
+       }
+       GUEST_DONE();
+}
+
+static int debug_version(uint64_t id_aa64dfr0)
+{
+       return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
+}
+
+static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vcpu);
+
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_BRK64, guest_sw_bp_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_SVC64, guest_svc_handler);
+
+       /* Specify bpn/wpn/ctx_bpn to be tested */
+       vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
+       pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
+
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               goto done;
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+       }
+
+done:
+       kvm_vm_free(vm);
+}
+
+void test_single_step_from_userspace(int test_cnt)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       struct kvm_run *run;
+       uint64_t pc, cmd;
+       uint64_t test_pc = 0;
+       bool ss_enable = false;
+       struct kvm_guest_debug debug = {};
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
+       run = vcpu->run;
+       vcpu_args_set(vcpu, 1, test_cnt);
+
+       while (1) {
+               vcpu_run(vcpu);
+               if (run->exit_reason != KVM_EXIT_DEBUG) {
+                       cmd = get_ucall(vcpu, &uc);
+                       if (cmd == UCALL_ABORT) {
+                               REPORT_GUEST_ASSERT(uc);
+                               /* NOT REACHED */
+                       } else if (cmd == UCALL_DONE) {
+                               break;
+                       }
+
+                       TEST_ASSERT(cmd == UCALL_NONE,
+                                   "Unexpected ucall cmd 0x%lx", cmd);
+
+                       debug.control = KVM_GUESTDBG_ENABLE |
+                                       KVM_GUESTDBG_SINGLESTEP;
+                       ss_enable = true;
+                       vcpu_guest_debug_set(vcpu, &debug);
+                       continue;
+               }
+
+               TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
+
+               /* Check if the current pc is expected. */
+               pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+               TEST_ASSERT(!test_pc || pc == test_pc,
+                           "Unexpected pc 0x%lx (expected 0x%lx)",
+                           pc, test_pc);
+
+               if ((pc + 4) == (uint64_t)&iter_ss_end) {
+                       test_pc = 0;
+                       debug.control = KVM_GUESTDBG_ENABLE;
+                       ss_enable = false;
+                       vcpu_guest_debug_set(vcpu, &debug);
+                       continue;
+               }
+
+               /*
+                * If the current pc is between iter_ss_bgin and
+                * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
+                * be the current pc + 4.
+                */
+               if ((pc >= (uint64_t)&iter_ss_begin) &&
+                   (pc < (uint64_t)&iter_ss_end))
+                       test_pc = pc + 4;
+               else
+                       test_pc = 0;
+       }
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * Run debug testing using the various breakpoint#, watchpoint# and
+ * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
+ */
+void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
+{
+       uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
+       int b, w, c;
+
+       /* Number of breakpoints */
+       brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
+       __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
+
+       /* Number of watchpoints */
+       wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
+
+       /* Number of context aware breakpoints */
+       ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
+
+       pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
+                brp_num, wrp_num, ctx_brp_num);
+
+       /* Number of normal (non-context aware) breakpoints */
+       normal_brp_num = brp_num - ctx_brp_num;
+
+       /* Lowest context aware breakpoint number */
+       ctx_brp_base = normal_brp_num;
+
+       /* Run tests with all supported breakpoints/watchpoints */
+       for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
+               for (b = 0; b < normal_brp_num; b++) {
+                       for (w = 0; w < wrp_num; w++)
+                               test_guest_debug_exceptions(b, w, c);
+               }
+       }
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("Usage: %s [-h] [-i iterations of the single step test]\n", name);
+       puts("");
+       exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       int opt;
+       int ss_iteration = 10000;
+       uint64_t aa64dfr0;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
+       __TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
+                      "Armv8 debug architecture not supported.");
+       kvm_vm_free(vm);
+
+       while ((opt = getopt(argc, argv, "i:")) != -1) {
+               switch (opt) {
+               case 'i':
+                       ss_iteration = atoi_positive("Number of iterations", optarg);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       test_guest_debug_exceptions_all(aa64dfr0);
+       test_single_step_from_userspace(ss_iteration);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
new file mode 100644 (file)
index 0000000..d43fb3f
--- /dev/null
@@ -0,0 +1,771 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * While the blessed list should be created from the oldest possible
+ * kernel, we can't go older than v5.2, though, because that's the first
+ * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
+ * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
+ * registers won't match expectations.
+ */
+#include <stdio.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+struct feature_id_reg {
+       __u64 reg;
+       __u64 id_reg;
+       __u64 feat_shift;
+       __u64 feat_min;
+};
+
+static struct feature_id_reg feat_id_regs[] = {
+       {
+               ARM64_SYS_REG(3, 0, 2, 0, 3),   /* TCR2_EL1 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               0,
+               1
+       },
+       {
+               ARM64_SYS_REG(3, 0, 10, 2, 2),  /* PIRE0_EL1 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               8,
+               1
+       },
+       {
+               ARM64_SYS_REG(3, 0, 10, 2, 3),  /* PIR_EL1 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               8,
+               1
+       },
+       {
+               ARM64_SYS_REG(3, 0, 10, 2, 4),  /* POR_EL1 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               16,
+               1
+       },
+       {
+               ARM64_SYS_REG(3, 3, 10, 2, 4),  /* POR_EL0 */
+               ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+               16,
+               1
+       }
+};
+
+bool filter_reg(__u64 reg)
+{
+       /*
+        * DEMUX register presence depends on the host's CLIDR_EL1.
+        * This means there's no set of them that we can bless.
+        */
+       if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+               return true;
+
+       return false;
+}
+
+static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+       int i, ret;
+       __u64 data, feat_val;
+
+       for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) {
+               if (feat_id_regs[i].reg == reg) {
+                       ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data);
+                       if (ret < 0)
+                               return false;
+
+                       feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf);
+                       return feat_val >= feat_id_regs[i].feat_min;
+               }
+       }
+
+       return true;
+}
+
+bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+       return check_supported_feat_reg(vcpu, reg);
+}
+
+bool check_reject_set(int err)
+{
+       return err == EPERM;
+}
+
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+       int feature;
+
+       for_each_sublist(c, s) {
+               if (s->finalize) {
+                       feature = s->feature;
+                       vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
+               }
+       }
+}
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
+#define CORE_REGS_XX_NR_WORDS  2
+#define CORE_SPSR_XX_NR_WORDS  2
+#define CORE_FPREGS_XX_NR_WORDS        4
+
+static const char *core_id_to_str(const char *prefix, __u64 id)
+{
+       __u64 core_off = id & ~REG_MASK, idx;
+
+       /*
+        * core_off is the offset into struct kvm_regs
+        */
+       switch (core_off) {
+       case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+            KVM_REG_ARM_CORE_REG(regs.regs[30]):
+               idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
+               TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
+       case KVM_REG_ARM_CORE_REG(regs.sp):
+               return "KVM_REG_ARM_CORE_REG(regs.sp)";
+       case KVM_REG_ARM_CORE_REG(regs.pc):
+               return "KVM_REG_ARM_CORE_REG(regs.pc)";
+       case KVM_REG_ARM_CORE_REG(regs.pstate):
+               return "KVM_REG_ARM_CORE_REG(regs.pstate)";
+       case KVM_REG_ARM_CORE_REG(sp_el1):
+               return "KVM_REG_ARM_CORE_REG(sp_el1)";
+       case KVM_REG_ARM_CORE_REG(elr_el1):
+               return "KVM_REG_ARM_CORE_REG(elr_el1)";
+       case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+            KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+               idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
+               TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
+       case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+            KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+               idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
+               TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
+       case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+               return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
+       case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+               return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
+       }
+
+       TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+static const char *sve_id_to_str(const char *prefix, __u64 id)
+{
+       __u64 sve_off, n, i;
+
+       if (id == KVM_REG_ARM64_SVE_VLS)
+               return "KVM_REG_ARM64_SVE_VLS";
+
+       sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
+       i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
+
+       TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
+
+       switch (sve_off) {
+       case KVM_REG_ARM64_SVE_ZREG_BASE ...
+            KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
+               n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
+               TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
+                           "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
+               return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
+       case KVM_REG_ARM64_SVE_PREG_BASE ...
+            KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
+               n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
+               TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
+                           "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
+               return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
+       case KVM_REG_ARM64_SVE_FFR_BASE:
+               TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
+                           "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
+               return "KVM_REG_ARM64_SVE_FFR(0)";
+       }
+
+       return NULL;
+}
+
+void print_reg(const char *prefix, __u64 id)
+{
+       unsigned op0, op1, crn, crm, op2;
+       const char *reg_size = NULL;
+
+       TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
+                   "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
+
+       switch (id & KVM_REG_SIZE_MASK) {
+       case KVM_REG_SIZE_U8:
+               reg_size = "KVM_REG_SIZE_U8";
+               break;
+       case KVM_REG_SIZE_U16:
+               reg_size = "KVM_REG_SIZE_U16";
+               break;
+       case KVM_REG_SIZE_U32:
+               reg_size = "KVM_REG_SIZE_U32";
+               break;
+       case KVM_REG_SIZE_U64:
+               reg_size = "KVM_REG_SIZE_U64";
+               break;
+       case KVM_REG_SIZE_U128:
+               reg_size = "KVM_REG_SIZE_U128";
+               break;
+       case KVM_REG_SIZE_U256:
+               reg_size = "KVM_REG_SIZE_U256";
+               break;
+       case KVM_REG_SIZE_U512:
+               reg_size = "KVM_REG_SIZE_U512";
+               break;
+       case KVM_REG_SIZE_U1024:
+               reg_size = "KVM_REG_SIZE_U1024";
+               break;
+       case KVM_REG_SIZE_U2048:
+               reg_size = "KVM_REG_SIZE_U2048";
+               break;
+       default:
+               TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+                         prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+       }
+
+       switch (id & KVM_REG_ARM_COPROC_MASK) {
+       case KVM_REG_ARM_CORE:
+               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
+               break;
+       case KVM_REG_ARM_DEMUX:
+               TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
+                           "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
+               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
+                      reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
+               break;
+       case KVM_REG_ARM64_SYSREG:
+               op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
+               op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
+               crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
+               crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
+               op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
+               TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
+                           "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
+               printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
+               break;
+       case KVM_REG_ARM_FW:
+               TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
+                           "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
+               printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
+               break;
+       case KVM_REG_ARM_FW_FEAT_BMAP:
+               TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
+                           "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
+               printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
+               break;
+       case KVM_REG_ARM64_SVE:
+               printf("\t%s,\n", sve_id_to_str(prefix, id));
+               break;
+       default:
+               TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+                         prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+       }
+}
+
+/*
+ * The original blessed list was primed with the output of kernel version
+ * v4.15 with --core-reg-fixup and then later updated with new registers.
+ * (The --core-reg-fixup option and it's fixup function have been removed
+ * from the test, as it's unlikely to use this type of test on a kernel
+ * older than v5.2.)
+ *
+ * The blessed list is up to date with kernel version v6.4 (or so we hope)
+ */
+static __u64 base_regs[] = {
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
+       KVM_REG_ARM_FW_REG(0),          /* KVM_REG_ARM_PSCI_VERSION */
+       KVM_REG_ARM_FW_REG(1),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
+       KVM_REG_ARM_FW_REG(2),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
+       KVM_REG_ARM_FW_REG(3),          /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
+       KVM_REG_ARM_FW_FEAT_BMAP_REG(0),        /* KVM_REG_ARM_STD_BMAP */
+       KVM_REG_ARM_FW_FEAT_BMAP_REG(1),        /* KVM_REG_ARM_STD_HYP_BMAP */
+       KVM_REG_ARM_FW_FEAT_BMAP_REG(2),        /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+       ARM64_SYS_REG(3, 3, 14, 3, 1),  /* CNTV_CTL_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 3, 2),  /* CNTV_CVAL_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 0, 2),
+       ARM64_SYS_REG(3, 0, 0, 0, 0),   /* MIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 0, 6),   /* REVIDR_EL1 */
+       ARM64_SYS_REG(3, 1, 0, 0, 1),   /* CLIDR_EL1 */
+       ARM64_SYS_REG(3, 1, 0, 0, 7),   /* AIDR_EL1 */
+       ARM64_SYS_REG(3, 3, 0, 0, 1),   /* CTR_EL0 */
+       ARM64_SYS_REG(2, 0, 0, 0, 4),
+       ARM64_SYS_REG(2, 0, 0, 0, 5),
+       ARM64_SYS_REG(2, 0, 0, 0, 6),
+       ARM64_SYS_REG(2, 0, 0, 0, 7),
+       ARM64_SYS_REG(2, 0, 0, 1, 4),
+       ARM64_SYS_REG(2, 0, 0, 1, 5),
+       ARM64_SYS_REG(2, 0, 0, 1, 6),
+       ARM64_SYS_REG(2, 0, 0, 1, 7),
+       ARM64_SYS_REG(2, 0, 0, 2, 0),   /* MDCCINT_EL1 */
+       ARM64_SYS_REG(2, 0, 0, 2, 2),   /* MDSCR_EL1 */
+       ARM64_SYS_REG(2, 0, 0, 2, 4),
+       ARM64_SYS_REG(2, 0, 0, 2, 5),
+       ARM64_SYS_REG(2, 0, 0, 2, 6),
+       ARM64_SYS_REG(2, 0, 0, 2, 7),
+       ARM64_SYS_REG(2, 0, 0, 3, 4),
+       ARM64_SYS_REG(2, 0, 0, 3, 5),
+       ARM64_SYS_REG(2, 0, 0, 3, 6),
+       ARM64_SYS_REG(2, 0, 0, 3, 7),
+       ARM64_SYS_REG(2, 0, 0, 4, 4),
+       ARM64_SYS_REG(2, 0, 0, 4, 5),
+       ARM64_SYS_REG(2, 0, 0, 4, 6),
+       ARM64_SYS_REG(2, 0, 0, 4, 7),
+       ARM64_SYS_REG(2, 0, 0, 5, 4),
+       ARM64_SYS_REG(2, 0, 0, 5, 5),
+       ARM64_SYS_REG(2, 0, 0, 5, 6),
+       ARM64_SYS_REG(2, 0, 0, 5, 7),
+       ARM64_SYS_REG(2, 0, 0, 6, 4),
+       ARM64_SYS_REG(2, 0, 0, 6, 5),
+       ARM64_SYS_REG(2, 0, 0, 6, 6),
+       ARM64_SYS_REG(2, 0, 0, 6, 7),
+       ARM64_SYS_REG(2, 0, 0, 7, 4),
+       ARM64_SYS_REG(2, 0, 0, 7, 5),
+       ARM64_SYS_REG(2, 0, 0, 7, 6),
+       ARM64_SYS_REG(2, 0, 0, 7, 7),
+       ARM64_SYS_REG(2, 0, 0, 8, 4),
+       ARM64_SYS_REG(2, 0, 0, 8, 5),
+       ARM64_SYS_REG(2, 0, 0, 8, 6),
+       ARM64_SYS_REG(2, 0, 0, 8, 7),
+       ARM64_SYS_REG(2, 0, 0, 9, 4),
+       ARM64_SYS_REG(2, 0, 0, 9, 5),
+       ARM64_SYS_REG(2, 0, 0, 9, 6),
+       ARM64_SYS_REG(2, 0, 0, 9, 7),
+       ARM64_SYS_REG(2, 0, 0, 10, 4),
+       ARM64_SYS_REG(2, 0, 0, 10, 5),
+       ARM64_SYS_REG(2, 0, 0, 10, 6),
+       ARM64_SYS_REG(2, 0, 0, 10, 7),
+       ARM64_SYS_REG(2, 0, 0, 11, 4),
+       ARM64_SYS_REG(2, 0, 0, 11, 5),
+       ARM64_SYS_REG(2, 0, 0, 11, 6),
+       ARM64_SYS_REG(2, 0, 0, 11, 7),
+       ARM64_SYS_REG(2, 0, 0, 12, 4),
+       ARM64_SYS_REG(2, 0, 0, 12, 5),
+       ARM64_SYS_REG(2, 0, 0, 12, 6),
+       ARM64_SYS_REG(2, 0, 0, 12, 7),
+       ARM64_SYS_REG(2, 0, 0, 13, 4),
+       ARM64_SYS_REG(2, 0, 0, 13, 5),
+       ARM64_SYS_REG(2, 0, 0, 13, 6),
+       ARM64_SYS_REG(2, 0, 0, 13, 7),
+       ARM64_SYS_REG(2, 0, 0, 14, 4),
+       ARM64_SYS_REG(2, 0, 0, 14, 5),
+       ARM64_SYS_REG(2, 0, 0, 14, 6),
+       ARM64_SYS_REG(2, 0, 0, 14, 7),
+       ARM64_SYS_REG(2, 0, 0, 15, 4),
+       ARM64_SYS_REG(2, 0, 0, 15, 5),
+       ARM64_SYS_REG(2, 0, 0, 15, 6),
+       ARM64_SYS_REG(2, 0, 0, 15, 7),
+       ARM64_SYS_REG(2, 0, 1, 1, 4),   /* OSLSR_EL1 */
+       ARM64_SYS_REG(2, 4, 0, 7, 0),   /* DBGVCR32_EL2 */
+       ARM64_SYS_REG(3, 0, 0, 0, 5),   /* MPIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 0),   /* ID_PFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 1),   /* ID_PFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 2),   /* ID_DFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 3),   /* ID_AFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 4),   /* ID_MMFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 5),   /* ID_MMFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 6),   /* ID_MMFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 1, 7),   /* ID_MMFR3_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 0),   /* ID_ISAR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 1),   /* ID_ISAR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 2),   /* ID_ISAR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 3),   /* ID_ISAR3_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 4),   /* ID_ISAR4_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 5),   /* ID_ISAR5_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 6),   /* ID_MMFR4_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 2, 7),   /* ID_ISAR6_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 0),   /* MVFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 1),   /* MVFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 2),   /* MVFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 3),
+       ARM64_SYS_REG(3, 0, 0, 3, 4),   /* ID_PFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 5),   /* ID_DFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 6),   /* ID_MMFR5_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 3, 7),
+       ARM64_SYS_REG(3, 0, 0, 4, 0),   /* ID_AA64PFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 1),   /* ID_AA64PFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 2),   /* ID_AA64PFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 3),
+       ARM64_SYS_REG(3, 0, 0, 4, 4),   /* ID_AA64ZFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 5),   /* ID_AA64SMFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 4, 6),
+       ARM64_SYS_REG(3, 0, 0, 4, 7),
+       ARM64_SYS_REG(3, 0, 0, 5, 0),   /* ID_AA64DFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 1),   /* ID_AA64DFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 2),
+       ARM64_SYS_REG(3, 0, 0, 5, 3),
+       ARM64_SYS_REG(3, 0, 0, 5, 4),   /* ID_AA64AFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 5),   /* ID_AA64AFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 5, 6),
+       ARM64_SYS_REG(3, 0, 0, 5, 7),
+       ARM64_SYS_REG(3, 0, 0, 6, 0),   /* ID_AA64ISAR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 6, 1),   /* ID_AA64ISAR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 6, 2),   /* ID_AA64ISAR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 6, 3),
+       ARM64_SYS_REG(3, 0, 0, 6, 4),
+       ARM64_SYS_REG(3, 0, 0, 6, 5),
+       ARM64_SYS_REG(3, 0, 0, 6, 6),
+       ARM64_SYS_REG(3, 0, 0, 6, 7),
+       ARM64_SYS_REG(3, 0, 0, 7, 0),   /* ID_AA64MMFR0_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 1),   /* ID_AA64MMFR1_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 2),   /* ID_AA64MMFR2_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 3),   /* ID_AA64MMFR3_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 4),   /* ID_AA64MMFR4_EL1 */
+       ARM64_SYS_REG(3, 0, 0, 7, 5),
+       ARM64_SYS_REG(3, 0, 0, 7, 6),
+       ARM64_SYS_REG(3, 0, 0, 7, 7),
+       ARM64_SYS_REG(3, 0, 1, 0, 0),   /* SCTLR_EL1 */
+       ARM64_SYS_REG(3, 0, 1, 0, 1),   /* ACTLR_EL1 */
+       ARM64_SYS_REG(3, 0, 1, 0, 2),   /* CPACR_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 0),   /* TTBR0_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 1),   /* TTBR1_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 2),   /* TCR_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 0, 3),   /* TCR2_EL1 */
+       ARM64_SYS_REG(3, 0, 5, 1, 0),   /* AFSR0_EL1 */
+       ARM64_SYS_REG(3, 0, 5, 1, 1),   /* AFSR1_EL1 */
+       ARM64_SYS_REG(3, 0, 5, 2, 0),   /* ESR_EL1 */
+       ARM64_SYS_REG(3, 0, 6, 0, 0),   /* FAR_EL1 */
+       ARM64_SYS_REG(3, 0, 7, 4, 0),   /* PAR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 2, 0),  /* MAIR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 2, 2),  /* PIRE0_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 2, 3),  /* PIR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 2, 4),  /* POR_EL1 */
+       ARM64_SYS_REG(3, 0, 10, 3, 0),  /* AMAIR_EL1 */
+       ARM64_SYS_REG(3, 0, 12, 0, 0),  /* VBAR_EL1 */
+       ARM64_SYS_REG(3, 0, 12, 1, 1),  /* DISR_EL1 */
+       ARM64_SYS_REG(3, 0, 13, 0, 1),  /* CONTEXTIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 13, 0, 4),  /* TPIDR_EL1 */
+       ARM64_SYS_REG(3, 0, 14, 1, 0),  /* CNTKCTL_EL1 */
+       ARM64_SYS_REG(3, 2, 0, 0, 0),   /* CSSELR_EL1 */
+       ARM64_SYS_REG(3, 3, 10, 2, 4),  /* POR_EL0 */
+       ARM64_SYS_REG(3, 3, 13, 0, 2),  /* TPIDR_EL0 */
+       ARM64_SYS_REG(3, 3, 13, 0, 3),  /* TPIDRRO_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 0, 1),  /* CNTPCT_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 2, 1),  /* CNTP_CTL_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 2, 2),  /* CNTP_CVAL_EL0 */
+       ARM64_SYS_REG(3, 4, 3, 0, 0),   /* DACR32_EL2 */
+       ARM64_SYS_REG(3, 4, 5, 0, 1),   /* IFSR32_EL2 */
+       ARM64_SYS_REG(3, 4, 5, 3, 0),   /* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+       ARM64_SYS_REG(3, 0, 9, 14, 1),  /* PMINTENSET_EL1 */
+       ARM64_SYS_REG(3, 0, 9, 14, 2),  /* PMINTENCLR_EL1 */
+       ARM64_SYS_REG(3, 3, 9, 12, 0),  /* PMCR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 1),  /* PMCNTENSET_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 2),  /* PMCNTENCLR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 3),  /* PMOVSCLR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 4),  /* PMSWINC_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 12, 5),  /* PMSELR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 13, 0),  /* PMCCNTR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 14, 0),  /* PMUSERENR_EL0 */
+       ARM64_SYS_REG(3, 3, 9, 14, 3),  /* PMOVSSET_EL0 */
+       ARM64_SYS_REG(3, 3, 14, 8, 0),
+       ARM64_SYS_REG(3, 3, 14, 8, 1),
+       ARM64_SYS_REG(3, 3, 14, 8, 2),
+       ARM64_SYS_REG(3, 3, 14, 8, 3),
+       ARM64_SYS_REG(3, 3, 14, 8, 4),
+       ARM64_SYS_REG(3, 3, 14, 8, 5),
+       ARM64_SYS_REG(3, 3, 14, 8, 6),
+       ARM64_SYS_REG(3, 3, 14, 8, 7),
+       ARM64_SYS_REG(3, 3, 14, 9, 0),
+       ARM64_SYS_REG(3, 3, 14, 9, 1),
+       ARM64_SYS_REG(3, 3, 14, 9, 2),
+       ARM64_SYS_REG(3, 3, 14, 9, 3),
+       ARM64_SYS_REG(3, 3, 14, 9, 4),
+       ARM64_SYS_REG(3, 3, 14, 9, 5),
+       ARM64_SYS_REG(3, 3, 14, 9, 6),
+       ARM64_SYS_REG(3, 3, 14, 9, 7),
+       ARM64_SYS_REG(3, 3, 14, 10, 0),
+       ARM64_SYS_REG(3, 3, 14, 10, 1),
+       ARM64_SYS_REG(3, 3, 14, 10, 2),
+       ARM64_SYS_REG(3, 3, 14, 10, 3),
+       ARM64_SYS_REG(3, 3, 14, 10, 4),
+       ARM64_SYS_REG(3, 3, 14, 10, 5),
+       ARM64_SYS_REG(3, 3, 14, 10, 6),
+       ARM64_SYS_REG(3, 3, 14, 10, 7),
+       ARM64_SYS_REG(3, 3, 14, 11, 0),
+       ARM64_SYS_REG(3, 3, 14, 11, 1),
+       ARM64_SYS_REG(3, 3, 14, 11, 2),
+       ARM64_SYS_REG(3, 3, 14, 11, 3),
+       ARM64_SYS_REG(3, 3, 14, 11, 4),
+       ARM64_SYS_REG(3, 3, 14, 11, 5),
+       ARM64_SYS_REG(3, 3, 14, 11, 6),
+       ARM64_SYS_REG(3, 3, 14, 12, 0),
+       ARM64_SYS_REG(3, 3, 14, 12, 1),
+       ARM64_SYS_REG(3, 3, 14, 12, 2),
+       ARM64_SYS_REG(3, 3, 14, 12, 3),
+       ARM64_SYS_REG(3, 3, 14, 12, 4),
+       ARM64_SYS_REG(3, 3, 14, 12, 5),
+       ARM64_SYS_REG(3, 3, 14, 12, 6),
+       ARM64_SYS_REG(3, 3, 14, 12, 7),
+       ARM64_SYS_REG(3, 3, 14, 13, 0),
+       ARM64_SYS_REG(3, 3, 14, 13, 1),
+       ARM64_SYS_REG(3, 3, 14, 13, 2),
+       ARM64_SYS_REG(3, 3, 14, 13, 3),
+       ARM64_SYS_REG(3, 3, 14, 13, 4),
+       ARM64_SYS_REG(3, 3, 14, 13, 5),
+       ARM64_SYS_REG(3, 3, 14, 13, 6),
+       ARM64_SYS_REG(3, 3, 14, 13, 7),
+       ARM64_SYS_REG(3, 3, 14, 14, 0),
+       ARM64_SYS_REG(3, 3, 14, 14, 1),
+       ARM64_SYS_REG(3, 3, 14, 14, 2),
+       ARM64_SYS_REG(3, 3, 14, 14, 3),
+       ARM64_SYS_REG(3, 3, 14, 14, 4),
+       ARM64_SYS_REG(3, 3, 14, 14, 5),
+       ARM64_SYS_REG(3, 3, 14, 14, 6),
+       ARM64_SYS_REG(3, 3, 14, 14, 7),
+       ARM64_SYS_REG(3, 3, 14, 15, 0),
+       ARM64_SYS_REG(3, 3, 14, 15, 1),
+       ARM64_SYS_REG(3, 3, 14, 15, 2),
+       ARM64_SYS_REG(3, 3, 14, 15, 3),
+       ARM64_SYS_REG(3, 3, 14, 15, 4),
+       ARM64_SYS_REG(3, 3, 14, 15, 5),
+       ARM64_SYS_REG(3, 3, 14, 15, 6),
+       ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
+};
+
+static __u64 vregs[] = {
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
+       KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
+};
+
+static __u64 sve_regs[] = {
+       KVM_REG_ARM64_SVE_VLS,
+       KVM_REG_ARM64_SVE_ZREG(0, 0),
+       KVM_REG_ARM64_SVE_ZREG(1, 0),
+       KVM_REG_ARM64_SVE_ZREG(2, 0),
+       KVM_REG_ARM64_SVE_ZREG(3, 0),
+       KVM_REG_ARM64_SVE_ZREG(4, 0),
+       KVM_REG_ARM64_SVE_ZREG(5, 0),
+       KVM_REG_ARM64_SVE_ZREG(6, 0),
+       KVM_REG_ARM64_SVE_ZREG(7, 0),
+       KVM_REG_ARM64_SVE_ZREG(8, 0),
+       KVM_REG_ARM64_SVE_ZREG(9, 0),
+       KVM_REG_ARM64_SVE_ZREG(10, 0),
+       KVM_REG_ARM64_SVE_ZREG(11, 0),
+       KVM_REG_ARM64_SVE_ZREG(12, 0),
+       KVM_REG_ARM64_SVE_ZREG(13, 0),
+       KVM_REG_ARM64_SVE_ZREG(14, 0),
+       KVM_REG_ARM64_SVE_ZREG(15, 0),
+       KVM_REG_ARM64_SVE_ZREG(16, 0),
+       KVM_REG_ARM64_SVE_ZREG(17, 0),
+       KVM_REG_ARM64_SVE_ZREG(18, 0),
+       KVM_REG_ARM64_SVE_ZREG(19, 0),
+       KVM_REG_ARM64_SVE_ZREG(20, 0),
+       KVM_REG_ARM64_SVE_ZREG(21, 0),
+       KVM_REG_ARM64_SVE_ZREG(22, 0),
+       KVM_REG_ARM64_SVE_ZREG(23, 0),
+       KVM_REG_ARM64_SVE_ZREG(24, 0),
+       KVM_REG_ARM64_SVE_ZREG(25, 0),
+       KVM_REG_ARM64_SVE_ZREG(26, 0),
+       KVM_REG_ARM64_SVE_ZREG(27, 0),
+       KVM_REG_ARM64_SVE_ZREG(28, 0),
+       KVM_REG_ARM64_SVE_ZREG(29, 0),
+       KVM_REG_ARM64_SVE_ZREG(30, 0),
+       KVM_REG_ARM64_SVE_ZREG(31, 0),
+       KVM_REG_ARM64_SVE_PREG(0, 0),
+       KVM_REG_ARM64_SVE_PREG(1, 0),
+       KVM_REG_ARM64_SVE_PREG(2, 0),
+       KVM_REG_ARM64_SVE_PREG(3, 0),
+       KVM_REG_ARM64_SVE_PREG(4, 0),
+       KVM_REG_ARM64_SVE_PREG(5, 0),
+       KVM_REG_ARM64_SVE_PREG(6, 0),
+       KVM_REG_ARM64_SVE_PREG(7, 0),
+       KVM_REG_ARM64_SVE_PREG(8, 0),
+       KVM_REG_ARM64_SVE_PREG(9, 0),
+       KVM_REG_ARM64_SVE_PREG(10, 0),
+       KVM_REG_ARM64_SVE_PREG(11, 0),
+       KVM_REG_ARM64_SVE_PREG(12, 0),
+       KVM_REG_ARM64_SVE_PREG(13, 0),
+       KVM_REG_ARM64_SVE_PREG(14, 0),
+       KVM_REG_ARM64_SVE_PREG(15, 0),
+       KVM_REG_ARM64_SVE_FFR(0),
+       ARM64_SYS_REG(3, 0, 1, 2, 0),   /* ZCR_EL1 */
+};
+
+static __u64 sve_rejects_set[] = {
+       KVM_REG_ARM64_SVE_VLS,
+};
+
+static __u64 pauth_addr_regs[] = {
+       ARM64_SYS_REG(3, 0, 2, 1, 0),   /* APIAKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 1, 1),   /* APIAKEYHI_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 1, 2),   /* APIBKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 1, 3),   /* APIBKEYHI_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 2, 0),   /* APDAKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 2, 1),   /* APDAKEYHI_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 2, 2),   /* APDBKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 2, 3)    /* APDBKEYHI_EL1 */
+};
+
+static __u64 pauth_generic_regs[] = {
+       ARM64_SYS_REG(3, 0, 2, 3, 0),   /* APGAKEYLO_EL1 */
+       ARM64_SYS_REG(3, 0, 2, 3, 1),   /* APGAKEYHI_EL1 */
+};
+
+#define BASE_SUBLIST \
+       { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+       { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+       { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+         .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+#define SVE_SUBLIST \
+       { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+         .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+         .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+#define PAUTH_SUBLIST                                                  \
+       {                                                               \
+               .name           = "pauth_address",                      \
+               .capability     = KVM_CAP_ARM_PTRAUTH_ADDRESS,          \
+               .feature        = KVM_ARM_VCPU_PTRAUTH_ADDRESS,         \
+               .regs           = pauth_addr_regs,                      \
+               .regs_n         = ARRAY_SIZE(pauth_addr_regs),          \
+       },                                                              \
+       {                                                               \
+               .name           = "pauth_generic",                      \
+               .capability     = KVM_CAP_ARM_PTRAUTH_GENERIC,          \
+               .feature        = KVM_ARM_VCPU_PTRAUTH_GENERIC,         \
+               .regs           = pauth_generic_regs,                   \
+               .regs_n         = ARRAY_SIZE(pauth_generic_regs),       \
+       }
+
+static struct vcpu_reg_list vregs_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list vregs_pmu_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       PMU_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list sve_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVE_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list sve_pmu_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVE_SUBLIST,
+       PMU_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list pauth_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       PAUTH_SUBLIST,
+       {0},
+       },
+};
+static struct vcpu_reg_list pauth_pmu_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       VREGS_SUBLIST,
+       PAUTH_SUBLIST,
+       PMU_SUBLIST,
+       {0},
+       },
+};
+
+struct vcpu_reg_list *vcpu_configs[] = {
+       &vregs_config,
+       &vregs_pmu_config,
+       &sve_config,
+       &sve_pmu_config,
+       &pauth_config,
+       &pauth_pmu_config,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c
new file mode 100644 (file)
index 0000000..ec54ec7
--- /dev/null
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface.
+ *
+ * The test validates the basic hypercall functionalities that are exposed
+ * via the psuedo-firmware bitmap register. This includes the registers'
+ * read/write behavior before and after the VM has started, and if the
+ * hypercalls are properly masked or unmasked to the guest when disabled or
+ * enabled from the KVM userspace, respectively.
+ */
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0))
+
+/* Last valid bits of the bitmapped firmware registers */
+#define KVM_REG_ARM_STD_BMAP_BIT_MAX           0
+#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX       0
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX    1
+
+struct kvm_fw_reg_info {
+       uint64_t reg;           /* Register definition */
+       uint64_t max_feat_bit;  /* Bit that represents the upper limit of the feature-map */
+};
+
+#define FW_REG_INFO(r)                 \
+       {                                       \
+               .reg = r,                       \
+               .max_feat_bit = r##_BIT_MAX,    \
+       }
+
+static const struct kvm_fw_reg_info fw_reg_info[] = {
+       FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
+       FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
+       FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+};
+
+enum test_stage {
+       TEST_STAGE_REG_IFACE,
+       TEST_STAGE_HVC_IFACE_FEAT_DISABLED,
+       TEST_STAGE_HVC_IFACE_FEAT_ENABLED,
+       TEST_STAGE_HVC_IFACE_FALSE_INFO,
+       TEST_STAGE_END,
+};
+
+static int stage = TEST_STAGE_REG_IFACE;
+
+struct test_hvc_info {
+       uint32_t func_id;
+       uint64_t arg1;
+};
+
+#define TEST_HVC_INFO(f, a1)   \
+       {                       \
+               .func_id = f,   \
+               .arg1 = a1,     \
+       }
+
+static const struct test_hvc_info hvc_info[] = {
+       /* KVM_REG_ARM_STD_BMAP */
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0),
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64),
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0),
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0),
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0),
+
+       /* KVM_REG_ARM_STD_HYP_BMAP */
+       TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES),
+       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST),
+       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0),
+
+       /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID,
+                       ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0),
+       TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER),
+};
+
+/* Feed false hypercall info to test the KVM behavior */
+static const struct test_hvc_info false_hvc_info[] = {
+       /* Feature support check against a different family of hypercalls */
+       TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+       TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64),
+       TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64),
+};
+
+static void guest_test_hvc(const struct test_hvc_info *hc_info)
+{
+       unsigned int i;
+       struct arm_smccc_res res;
+       unsigned int hvc_info_arr_sz;
+
+       hvc_info_arr_sz =
+       hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info);
+
+       for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
+               memset(&res, 0, sizeof(res));
+               smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
+
+               switch (stage) {
+               case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+               case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+                       __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
+                       break;
+               case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+                       __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
+                       break;
+               default:
+                       GUEST_FAIL("Unexpected stage = %u", stage);
+               }
+       }
+}
+
+static void guest_code(void)
+{
+       while (stage != TEST_STAGE_END) {
+               switch (stage) {
+               case TEST_STAGE_REG_IFACE:
+                       break;
+               case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+               case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+                       guest_test_hvc(hvc_info);
+                       break;
+               case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+                       guest_test_hvc(false_hvc_info);
+                       break;
+               default:
+                       GUEST_FAIL("Unexpected stage = %u", stage);
+               }
+
+               GUEST_SYNC(stage);
+       }
+
+       GUEST_DONE();
+}
+
+struct st_time {
+       uint32_t rev;
+       uint32_t attr;
+       uint64_t st_time;
+};
+
+#define STEAL_TIME_SIZE                ((sizeof(struct st_time) + 63) & ~63)
+#define ST_GPA_BASE            (1 << 30)
+
+static void steal_time_init(struct kvm_vcpu *vcpu)
+{
+       uint64_t st_ipa = (ulong)ST_GPA_BASE;
+       unsigned int gpages;
+
+       gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
+       vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+
+       vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL,
+                            KVM_ARM_VCPU_PVTIME_IPA, &st_ipa);
+}
+
+static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
+{
+       uint64_t val;
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+               const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+               /* First 'read' should be an upper limit of the features supported */
+               val = vcpu_get_reg(vcpu, reg_info->reg);
+               TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
+                       "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+                       reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+
+               /* Test a 'write' by disabling all the features of the register map */
+               ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+               TEST_ASSERT(ret == 0,
+                       "Failed to clear all the features of reg: 0x%lx; ret: %d",
+                       reg_info->reg, errno);
+
+               val = vcpu_get_reg(vcpu, reg_info->reg);
+               TEST_ASSERT(val == 0,
+                       "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
+
+               /*
+                * Test enabling a feature that's not supported.
+                * Avoid this check if all the bits are occupied.
+                */
+               if (reg_info->max_feat_bit < 63) {
+                       ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
+                       TEST_ASSERT(ret != 0 && errno == EINVAL,
+                       "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
+                       errno, reg_info->reg);
+               }
+       }
+}
+
+static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
+{
+       uint64_t val;
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+               const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+               /*
+                * Before starting the VM, the test clears all the bits.
+                * Check if that's still the case.
+                */
+               val = vcpu_get_reg(vcpu, reg_info->reg);
+               TEST_ASSERT(val == 0,
+                       "Expected all the features to be cleared for reg: 0x%lx",
+                       reg_info->reg);
+
+               /*
+                * Since the VM has run at least once, KVM shouldn't allow modification of
+                * the registers and should return EBUSY. Set the registers and check for
+                * the expected errno.
+                */
+               ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
+               TEST_ASSERT(ret != 0 && errno == EBUSY,
+               "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
+               errno, reg_info->reg);
+       }
+}
+
+static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+       steal_time_init(*vcpu);
+
+       return vm;
+}
+
+static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
+{
+       int prev_stage = stage;
+
+       pr_debug("Stage: %d\n", prev_stage);
+
+       /* Sync the stage early, the VM might be freed below. */
+       stage++;
+       sync_global_to_guest(*vm, stage);
+
+       switch (prev_stage) {
+       case TEST_STAGE_REG_IFACE:
+               test_fw_regs_after_vm_start(*vcpu);
+               break;
+       case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+               /* Start a new VM so that all the features are now enabled by default */
+               kvm_vm_free(*vm);
+               *vm = test_vm_create(vcpu);
+               break;
+       case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+       case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+               break;
+       default:
+               TEST_FAIL("Unknown test stage: %d", prev_stage);
+       }
+}
+
+static void test_run(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       bool guest_done = false;
+
+       vm = test_vm_create(&vcpu);
+
+       test_fw_regs_before_vm_start(vcpu);
+
+       while (!guest_done) {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       test_guest_stage(&vm, &vcpu);
+                       break;
+               case UCALL_DONE:
+                       guest_done = true;
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               default:
+                       TEST_FAIL("Unexpected guest exit");
+               }
+       }
+
+       kvm_vm_free(vm);
+}
+
+int main(void)
+{
+       test_run();
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c
new file mode 100644 (file)
index 0000000..8b7a80a
--- /dev/null
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * mmio_abort - Tests for userspace MMIO abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR      0x8000000ULL
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+       u64 esr = read_sysreg(esr_el1);
+
+       GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+       GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+       GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+       GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+       GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+                                                 handler_fn dabt_handler)
+{
+       struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(*vcpu);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+       virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+       return vm;
+}
+
+static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_events events = {};
+
+       events.exception.ext_dabt_pending = true;
+       vcpu_events_set(vcpu, &events);
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+       }
+}
+
+extern char test_mmio_abort_insn;
+
+static void test_mmio_abort_guest(void)
+{
+       WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+       asm volatile("test_mmio_abort_insn:\n\t"
+                    "ldr x0, [%0]\n\t"
+                    : : "r" (MMIO_ADDR) : "x0", "memory");
+
+       GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+                                                       expect_sea_handler);
+       struct kvm_run *run = vcpu->run;
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+       TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+       TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+       TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+       vcpu_inject_extabt(vcpu);
+       vcpu_run_expect_done(vcpu);
+       kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+       WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+       asm volatile("test_mmio_nisv_insn:\n\t"
+                    "ldr x0, [%0], #8\n\t"
+                    : : "r" (MMIO_ADDR) : "x0", "memory");
+
+       GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+                                                       unexpected_dabt_handler);
+
+       TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+       TEST_ASSERT_EQ(errno, ENOSYS);
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+                                                       expect_sea_handler);
+       struct kvm_run *run = vcpu->run;
+
+       vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+       TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+       vcpu_inject_extabt(vcpu);
+       vcpu_run_expect_done(vcpu);
+       kvm_vm_free(vm);
+}
+
+int main(void)
+{
+       test_mmio_abort();
+       test_mmio_nisv();
+       test_mmio_nisv_abort();
+}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
new file mode 100644 (file)
index 0000000..ebd7043
--- /dev/null
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3 system, not configuring GICv3 correctly
+// results in all of the sysregs generating an UNDEF exception.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r)                                     \
+       ({                                                      \
+               uint64_t val;                                   \
+                                                               \
+               handled = false;                                \
+               dsb(sy);                                        \
+               val = read_sysreg_s(SYS_ ## r);                 \
+               val;                                            \
+       })
+
+#define __check_sr_write(r)                                    \
+       do {                                                    \
+               handled = false;                                \
+               dsb(sy);                                        \
+               write_sysreg_s(0, SYS_ ## r);                   \
+               isb();                                          \
+       } while(0)
+
+/* Fatal checks */
+#define check_sr_read(r)                                       \
+       do {                                                    \
+               __check_sr_read(r);                             \
+               __GUEST_ASSERT(handled, #r " no read trap");    \
+       } while(0)
+
+#define check_sr_write(r)                                      \
+       do {                                                    \
+               __check_sr_write(r);                            \
+               __GUEST_ASSERT(handled, #r " no write trap");   \
+       } while(0)
+
+#define check_sr_rw(r)                         \
+       do {                                    \
+               check_sr_read(r);               \
+               check_sr_write(r);              \
+       } while(0)
+
+static void guest_code(void)
+{
+       uint64_t val;
+
+       /*
+        * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+        * hidden the feature at runtime without any other userspace action.
+        */
+       __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
+                                read_sysreg(id_aa64pfr0_el1)) == 0,
+                      "GICv3 wrongly advertised");
+
+       /*
+        * Access all GICv3 registers, and fail if we don't get an UNDEF.
+        * Note that we happily access all the APxRn registers without
+        * checking their existance, as all we want to see is a failure.
+        */
+       check_sr_rw(ICC_PMR_EL1);
+       check_sr_read(ICC_IAR0_EL1);
+       check_sr_write(ICC_EOIR0_EL1);
+       check_sr_rw(ICC_HPPIR0_EL1);
+       check_sr_rw(ICC_BPR0_EL1);
+       check_sr_rw(ICC_AP0R0_EL1);
+       check_sr_rw(ICC_AP0R1_EL1);
+       check_sr_rw(ICC_AP0R2_EL1);
+       check_sr_rw(ICC_AP0R3_EL1);
+       check_sr_rw(ICC_AP1R0_EL1);
+       check_sr_rw(ICC_AP1R1_EL1);
+       check_sr_rw(ICC_AP1R2_EL1);
+       check_sr_rw(ICC_AP1R3_EL1);
+       check_sr_write(ICC_DIR_EL1);
+       check_sr_read(ICC_RPR_EL1);
+       check_sr_write(ICC_SGI1R_EL1);
+       check_sr_write(ICC_ASGI1R_EL1);
+       check_sr_write(ICC_SGI0R_EL1);
+       check_sr_read(ICC_IAR1_EL1);
+       check_sr_write(ICC_EOIR1_EL1);
+       check_sr_rw(ICC_HPPIR1_EL1);
+       check_sr_rw(ICC_BPR1_EL1);
+       check_sr_rw(ICC_CTLR_EL1);
+       check_sr_rw(ICC_IGRPEN0_EL1);
+       check_sr_rw(ICC_IGRPEN1_EL1);
+
+       /*
+        * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+        * be RAO/WI. Engage in non-fatal accesses, starting with a
+        * write of 0 to try and disable SRE, and let's see if it
+        * sticks.
+        */
+       __check_sr_write(ICC_SRE_EL1);
+       if (!handled)
+               GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+       val = __check_sr_read(ICC_SRE_EL1);
+       if (!handled) {
+               __GUEST_ASSERT((val & BIT(0)),
+                              "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+               GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+       }
+
+       GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+       /* Success, we've gracefully exploded! */
+       handled = true;
+       regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       do {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_PRINTF:
+                       printf("%s", uc.buffer);
+                       break;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_gicv3(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /* Create a VM without a GICv3 */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vcpu);
+
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+       test_run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t pfr0;
+
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+       pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+       __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
+                      "GICv3 not supported.");
+       kvm_vm_free(vm);
+
+       test_guest_no_gicv3();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
new file mode 100644 (file)
index 0000000..ec33a8f
--- /dev/null
@@ -0,0 +1,1135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * page_fault_test.c - Test stage 2 faults.
+ *
+ * This test tries different combinations of guest accesses (e.g., write,
+ * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
+ * hugetlbfs with a hole). It checks that the expected handling method is
+ * called (e.g., uffd faults with the right address and write/read flag).
+ */
+#include <linux/bitmap.h>
+#include <fcntl.h>
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <asm/sysreg.h>
+#include <linux/bitfield.h>
+#include "guest_modes.h"
+#include "userfaultfd_util.h"
+
+/* Guest virtual addresses that point to the test page and its PTE. */
+#define TEST_GVA                               0xc0000000
+#define TEST_EXEC_GVA                          (TEST_GVA + 0x8)
+#define TEST_PTE_GVA                           0xb0000000
+#define TEST_DATA                              0x0123456789ABCDEF
+
+static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
+
+#define CMD_NONE                               (0)
+#define CMD_SKIP_TEST                          (1ULL << 1)
+#define CMD_HOLE_PT                            (1ULL << 2)
+#define CMD_HOLE_DATA                          (1ULL << 3)
+#define CMD_CHECK_WRITE_IN_DIRTY_LOG           (1ULL << 4)
+#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG                (1ULL << 5)
+#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG                (1ULL << 6)
+#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG     (1ULL << 7)
+#define CMD_SET_PTE_AF                         (1ULL << 8)
+
+#define PREPARE_FN_NR                          10
+#define CHECK_FN_NR                            10
+
+static struct event_cnt {
+       int mmio_exits;
+       int fail_vcpu_runs;
+       int uffd_faults;
+       /* uffd_faults is incremented from multiple threads. */
+       pthread_mutex_t uffd_faults_mutex;
+} events;
+
+struct test_desc {
+       const char *name;
+       uint64_t mem_mark_cmd;
+       /* Skip the test if any prepare function returns false */
+       bool (*guest_prepare[PREPARE_FN_NR])(void);
+       void (*guest_test)(void);
+       void (*guest_test_check[CHECK_FN_NR])(void);
+       uffd_handler_t uffd_pt_handler;
+       uffd_handler_t uffd_data_handler;
+       void (*dabt_handler)(struct ex_regs *regs);
+       void (*iabt_handler)(struct ex_regs *regs);
+       void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
+       void (*fail_vcpu_run_handler)(int ret);
+       uint32_t pt_memslot_flags;
+       uint32_t data_memslot_flags;
+       bool skip;
+       struct event_cnt expected_events;
+};
+
+struct test_params {
+       enum vm_mem_backing_src_type src_type;
+       struct test_desc *test_desc;
+};
+
+static inline void flush_tlb_page(uint64_t vaddr)
+{
+       uint64_t page = vaddr >> 12;
+
+       dsb(ishst);
+       asm volatile("tlbi vaae1is, %0" :: "r" (page));
+       dsb(ish);
+       isb();
+}
+
+static void guest_write64(void)
+{
+       uint64_t val;
+
+       WRITE_ONCE(*guest_test_memory, TEST_DATA);
+       val = READ_ONCE(*guest_test_memory);
+       GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+/* Check the system for atomic instructions. */
+static bool guest_check_lse(void)
+{
+       uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
+       uint64_t atomic;
+
+       atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
+       return atomic >= 2;
+}
+
+static bool guest_check_dc_zva(void)
+{
+       uint64_t dczid = read_sysreg(dczid_el0);
+       uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
+
+       return dzp == 0;
+}
+
+/* Compare and swap instruction. */
+static void guest_cas(void)
+{
+       uint64_t val;
+
+       GUEST_ASSERT(guest_check_lse());
+       asm volatile(".arch_extension lse\n"
+                    "casal %0, %1, [%2]\n"
+                    :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
+       val = READ_ONCE(*guest_test_memory);
+       GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+static void guest_read64(void)
+{
+       uint64_t val;
+
+       val = READ_ONCE(*guest_test_memory);
+       GUEST_ASSERT_EQ(val, 0);
+}
+
+/* Address translation instruction */
+static void guest_at(void)
+{
+       uint64_t par;
+
+       asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
+       isb();
+       par = read_sysreg(par_el1);
+
+       /* Bit 1 indicates whether the AT was successful */
+       GUEST_ASSERT_EQ(par & 1, 0);
+}
+
+/*
+ * The size of the block written by "dc zva" is guaranteed to be between (2 <<
+ * 0) and (2 << 9), which is safe in our case as we need the write to happen
+ * for at least a word, and not more than a page.
+ */
+static void guest_dc_zva(void)
+{
+       uint16_t val;
+
+       asm volatile("dc zva, %0" :: "r" (guest_test_memory));
+       dsb(ish);
+       val = READ_ONCE(*guest_test_memory);
+       GUEST_ASSERT_EQ(val, 0);
+}
+
+/*
+ * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
+ * And that's special because KVM must take special care with those: they
+ * should still count as accesses for dirty logging or user-faulting, but
+ * should be handled differently on mmio.
+ */
+static void guest_ld_preidx(void)
+{
+       uint64_t val;
+       uint64_t addr = TEST_GVA - 8;
+
+       /*
+        * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
+        * in a gap between memslots not backing by anything.
+        */
+       asm volatile("ldr %0, [%1, #8]!"
+                    : "=r" (val), "+r" (addr));
+       GUEST_ASSERT_EQ(val, 0);
+       GUEST_ASSERT_EQ(addr, TEST_GVA);
+}
+
+static void guest_st_preidx(void)
+{
+       uint64_t val = TEST_DATA;
+       uint64_t addr = TEST_GVA - 8;
+
+       asm volatile("str %0, [%1, #8]!"
+                    : "+r" (val), "+r" (addr));
+
+       GUEST_ASSERT_EQ(addr, TEST_GVA);
+       val = READ_ONCE(*guest_test_memory);
+}
+
+static bool guest_set_ha(void)
+{
+       uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+       uint64_t hadbs, tcr;
+
+       /* Skip if HA is not supported. */
+       hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
+       if (hadbs == 0)
+               return false;
+
+       tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+       write_sysreg(tcr, tcr_el1);
+       isb();
+
+       return true;
+}
+
+static bool guest_clear_pte_af(void)
+{
+       *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
+       flush_tlb_page(TEST_GVA);
+
+       return true;
+}
+
+static void guest_check_pte_af(void)
+{
+       dsb(ish);
+       GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
+}
+
+static void guest_check_write_in_dirty_log(void)
+{
+       GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_write_in_dirty_log(void)
+{
+       GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_s1ptw_wr_in_dirty_log(void)
+{
+       GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_s1ptw_wr_in_dirty_log(void)
+{
+       GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_exec(void)
+{
+       int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
+       int ret;
+
+       ret = code();
+       GUEST_ASSERT_EQ(ret, 0x77);
+}
+
+static bool guest_prepare(struct test_desc *test)
+{
+       bool (*prepare_fn)(void);
+       int i;
+
+       for (i = 0; i < PREPARE_FN_NR; i++) {
+               prepare_fn = test->guest_prepare[i];
+               if (prepare_fn && !prepare_fn())
+                       return false;
+       }
+
+       return true;
+}
+
+static void guest_test_check(struct test_desc *test)
+{
+       void (*check_fn)(void);
+       int i;
+
+       for (i = 0; i < CHECK_FN_NR; i++) {
+               check_fn = test->guest_test_check[i];
+               if (check_fn)
+                       check_fn();
+       }
+}
+
+static void guest_code(struct test_desc *test)
+{
+       if (!guest_prepare(test))
+               GUEST_SYNC(CMD_SKIP_TEST);
+
+       GUEST_SYNC(test->mem_mark_cmd);
+
+       if (test->guest_test)
+               test->guest_test();
+
+       guest_test_check(test);
+       GUEST_DONE();
+}
+
+static void no_dabt_handler(struct ex_regs *regs)
+{
+       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
+}
+
+static void no_iabt_handler(struct ex_regs *regs)
+{
+       GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
+}
+
+static struct uffd_args {
+       char *copy;
+       void *hva;
+       uint64_t paging_size;
+} pt_args, data_args;
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
+                               struct uffd_args *args)
+{
+       uint64_t addr = msg->arg.pagefault.address;
+       uint64_t flags = msg->arg.pagefault.flags;
+       struct uffdio_copy copy;
+       int ret;
+
+       TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
+                   "The only expected UFFD mode is MISSING");
+       TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
+
+       pr_debug("uffd fault: addr=%p write=%d\n",
+                (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
+
+       copy.src = (uint64_t)args->copy;
+       copy.dst = addr;
+       copy.len = args->paging_size;
+       copy.mode = 0;
+
+       ret = ioctl(uffd, UFFDIO_COPY, &copy);
+       if (ret == -1) {
+               pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
+                       addr, errno);
+               return ret;
+       }
+
+       pthread_mutex_lock(&events.uffd_faults_mutex);
+       events.uffd_faults += 1;
+       pthread_mutex_unlock(&events.uffd_faults_mutex);
+       return 0;
+}
+
+static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+       return uffd_generic_handler(mode, uffd, msg, &pt_args);
+}
+
+static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+       return uffd_generic_handler(mode, uffd, msg, &data_args);
+}
+
+static void setup_uffd_args(struct userspace_mem_region *region,
+                           struct uffd_args *args)
+{
+       args->hva = (void *)region->region.userspace_addr;
+       args->paging_size = region->region.memory_size;
+
+       args->copy = malloc(args->paging_size);
+       TEST_ASSERT(args->copy, "Failed to allocate data copy.");
+       memcpy(args->copy, args->hva, args->paging_size);
+}
+
+static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
+                      struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
+{
+       struct test_desc *test = p->test_desc;
+       int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+
+       setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
+       setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
+
+       *pt_uffd = NULL;
+       if (test->uffd_pt_handler)
+               *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+                                                   pt_args.hva,
+                                                   pt_args.paging_size,
+                                                   1, test->uffd_pt_handler);
+
+       *data_uffd = NULL;
+       if (test->uffd_data_handler)
+               *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+                                                     data_args.hva,
+                                                     data_args.paging_size,
+                                                     1, test->uffd_data_handler);
+}
+
+static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
+                     struct uffd_desc *data_uffd)
+{
+       if (test->uffd_pt_handler)
+               uffd_stop_demand_paging(pt_uffd);
+       if (test->uffd_data_handler)
+               uffd_stop_demand_paging(data_uffd);
+
+       free(pt_args.copy);
+       free(data_args.copy);
+}
+
+static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+       TEST_FAIL("There was no UFFD fault expected.");
+       return -1;
+}
+
+/* Returns false if the test should be skipped. */
+static bool punch_hole_in_backing_store(struct kvm_vm *vm,
+                                       struct userspace_mem_region *region)
+{
+       void *hva = (void *)region->region.userspace_addr;
+       uint64_t paging_size = region->region.memory_size;
+       int ret, fd = region->fd;
+
+       if (fd != -1) {
+               ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                               0, paging_size);
+               TEST_ASSERT(ret == 0, "fallocate failed");
+       } else {
+               ret = madvise(hva, paging_size, MADV_DONTNEED);
+               TEST_ASSERT(ret == 0, "madvise failed");
+       }
+
+       return true;
+}
+
+static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+       struct userspace_mem_region *region;
+       void *hva;
+
+       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+       hva = (void *)region->region.userspace_addr;
+
+       TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+
+       memcpy(hva, run->mmio.data, run->mmio.len);
+       events.mmio_exits += 1;
+}
+
+static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+       uint64_t data;
+
+       memcpy(&data, run->mmio.data, sizeof(data));
+       pr_debug("addr=%lld len=%d w=%d data=%lx\n",
+                run->mmio.phys_addr, run->mmio.len,
+                run->mmio.is_write, data);
+       TEST_FAIL("There was no MMIO exit expected.");
+}
+
+static bool check_write_in_dirty_log(struct kvm_vm *vm,
+                                    struct userspace_mem_region *region,
+                                    uint64_t host_pg_nr)
+{
+       unsigned long *bmap;
+       bool first_page_dirty;
+       uint64_t size = region->region.memory_size;
+
+       /* getpage_size() is not always equal to vm->page_size */
+       bmap = bitmap_zalloc(size / getpagesize());
+       kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
+       first_page_dirty = test_bit(host_pg_nr, bmap);
+       free(bmap);
+       return first_page_dirty;
+}
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static bool handle_cmd(struct kvm_vm *vm, int cmd)
+{
+       struct userspace_mem_region *data_region, *pt_region;
+       bool continue_test = true;
+       uint64_t pte_gpa, pte_pg;
+
+       data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+       pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
+       pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+       pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
+
+       if (cmd == CMD_SKIP_TEST)
+               continue_test = false;
+
+       if (cmd & CMD_HOLE_PT)
+               continue_test = punch_hole_in_backing_store(vm, pt_region);
+       if (cmd & CMD_HOLE_DATA)
+               continue_test = punch_hole_in_backing_store(vm, data_region);
+       if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
+               TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
+                           "Missing write in dirty log");
+       if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
+               TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
+                           "Missing s1ptw write in dirty log");
+       if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
+               TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
+                           "Unexpected write in dirty log");
+       if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
+               TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
+                           "Unexpected s1ptw write in dirty log");
+
+       return continue_test;
+}
+
+void fail_vcpu_run_no_handler(int ret)
+{
+       TEST_FAIL("Unexpected vcpu run failure");
+}
+
+void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
+{
+       TEST_ASSERT(errno == ENOSYS,
+                   "The mmio handler should have returned not implemented.");
+       events.fail_vcpu_runs += 1;
+}
+
+typedef uint32_t aarch64_insn_t;
+extern aarch64_insn_t __exec_test[2];
+
+noinline void __return_0x77(void)
+{
+       asm volatile("__exec_test: mov x0, #0x77\n"
+                    "ret\n");
+}
+
+/*
+ * Note that this function runs on the host before the test VM starts: there's
+ * no need to sync the D$ and I$ caches.
+ */
+static void load_exec_code_for_test(struct kvm_vm *vm)
+{
+       uint64_t *code;
+       struct userspace_mem_region *region;
+       void *hva;
+
+       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+       hva = (void *)region->region.userspace_addr;
+
+       assert(TEST_EXEC_GVA > TEST_GVA);
+       code = hva + TEST_EXEC_GVA - TEST_GVA;
+       memcpy(code, __exec_test, sizeof(__exec_test));
+}
+
+static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+                                struct test_desc *test)
+{
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vcpu);
+
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_DABT_CUR, no_dabt_handler);
+       vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+                               ESR_ELx_EC_IABT_CUR, no_iabt_handler);
+}
+
+static void setup_gva_maps(struct kvm_vm *vm)
+{
+       struct userspace_mem_region *region;
+       uint64_t pte_gpa;
+
+       region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+       /* Map TEST_GVA first. This will install a new PTE. */
+       virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
+       /* Then map TEST_PTE_GVA to the above PTE. */
+       pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+       virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
+}
+
+enum pf_test_memslots {
+       CODE_AND_DATA_MEMSLOT,
+       PAGE_TABLE_MEMSLOT,
+       TEST_DATA_MEMSLOT,
+};
+
+/*
+ * Create a memslot for code and data at pfn=0, and test-data and PT ones
+ * at max_gfn.
+ */
+static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
+{
+       uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
+       uint64_t guest_page_size = vm->page_size;
+       uint64_t max_gfn = vm_compute_max_gfn(vm);
+       /* Enough for 2M of code when using 4K guest pages. */
+       uint64_t code_npages = 512;
+       uint64_t pt_size, data_size, data_gpa;
+
+       /*
+        * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
+        * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs.  That's 13
+        * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
+        * twice that just in case.
+        */
+       pt_size = 26 * guest_page_size;
+
+       /* memslot sizes and gpa's must be aligned to the backing page size */
+       pt_size = align_up(pt_size, backing_src_pagesz);
+       data_size = align_up(guest_page_size, backing_src_pagesz);
+       data_gpa = (max_gfn * guest_page_size) - data_size;
+       data_gpa = align_down(data_gpa, backing_src_pagesz);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
+                                   CODE_AND_DATA_MEMSLOT, code_npages, 0);
+       vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
+       vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
+
+       vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
+                                   PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
+                                   p->test_desc->pt_memslot_flags);
+       vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
+
+       vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
+                                   data_size / guest_page_size,
+                                   p->test_desc->data_memslot_flags);
+       vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void setup_ucall(struct kvm_vm *vm)
+{
+       struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+
+       ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
+}
+
+static void setup_default_handlers(struct test_desc *test)
+{
+       if (!test->mmio_handler)
+               test->mmio_handler = mmio_no_handler;
+
+       if (!test->fail_vcpu_run_handler)
+               test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
+}
+
+static void check_event_counts(struct test_desc *test)
+{
+       TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+       TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+       TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
+}
+
+static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
+{
+       struct test_desc *test = p->test_desc;
+
+       pr_debug("Test: %s\n", test->name);
+       pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+       pr_debug("Testing memory backing src type: %s\n",
+                vm_mem_backing_src_alias(p->src_type)->name);
+}
+
+static void reset_event_counts(void)
+{
+       memset(&events, 0, sizeof(events));
+}
+
+/*
+ * This function either succeeds, skips the test (after setting test->skip), or
+ * fails with a TEST_FAIL that aborts all tests.
+ */
+static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+                         struct test_desc *test)
+{
+       struct kvm_run *run;
+       struct ucall uc;
+       int ret;
+
+       run = vcpu->run;
+
+       for (;;) {
+               ret = _vcpu_run(vcpu);
+               if (ret) {
+                       test->fail_vcpu_run_handler(ret);
+                       goto done;
+               }
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       if (!handle_cmd(vm, uc.args[1])) {
+                               test->skip = true;
+                               goto done;
+                       }
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               case UCALL_NONE:
+                       if (run->exit_reason == KVM_EXIT_MMIO)
+                               test->mmio_handler(vm, run);
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+       struct test_params *p = (struct test_params *)arg;
+       struct test_desc *test = p->test_desc;
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       struct uffd_desc *pt_uffd, *data_uffd;
+
+       print_test_banner(mode, p);
+
+       vm = ____vm_create(VM_SHAPE(mode));
+       setup_memslots(vm, p);
+       kvm_vm_elf_load(vm, program_invocation_name);
+       setup_ucall(vm);
+       vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+       setup_gva_maps(vm);
+
+       reset_event_counts();
+
+       /*
+        * Set some code in the data memslot for the guest to execute (only
+        * applicable to the EXEC tests). This has to be done before
+        * setup_uffd() as that function copies the memslot data for the uffd
+        * handler.
+        */
+       load_exec_code_for_test(vm);
+       setup_uffd(vm, p, &pt_uffd, &data_uffd);
+       setup_abort_handlers(vm, vcpu, test);
+       setup_default_handlers(test);
+       vcpu_args_set(vcpu, 1, test);
+
+       vcpu_run_loop(vm, vcpu, test);
+
+       kvm_vm_free(vm);
+       free_uffd(test, pt_uffd, data_uffd);
+
+       /*
+        * Make sure we check the events after the uffd threads have exited,
+        * which means they updated their respective event counters.
+        */
+       if (!test->skip)
+               check_event_counts(test);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-s mem-type]\n", name);
+       puts("");
+       guest_modes_help();
+       backing_src_help("-s");
+       puts("");
+}
+
+#define SNAME(s)                       #s
+#define SCAT2(a, b)                    SNAME(a ## _ ## b)
+#define SCAT3(a, b, c)                 SCAT2(a, SCAT2(b, c))
+#define SCAT4(a, b, c, d)              SCAT2(a, SCAT3(b, c, d))
+
+#define _CHECK(_test)                  _CHECK_##_test
+#define _PREPARE(_test)                        _PREPARE_##_test
+#define _PREPARE_guest_read64          NULL
+#define _PREPARE_guest_ld_preidx       NULL
+#define _PREPARE_guest_write64         NULL
+#define _PREPARE_guest_st_preidx       NULL
+#define _PREPARE_guest_exec            NULL
+#define _PREPARE_guest_at              NULL
+#define _PREPARE_guest_dc_zva          guest_check_dc_zva
+#define _PREPARE_guest_cas             guest_check_lse
+
+/* With or without access flag checks */
+#define _PREPARE_with_af               guest_set_ha, guest_clear_pte_af
+#define _PREPARE_no_af                 NULL
+#define _CHECK_with_af                 guest_check_pte_af
+#define _CHECK_no_af                   NULL
+
+/* Performs an access and checks that no faults were triggered. */
+#define TEST_ACCESS(_access, _with_af, _mark_cmd)                              \
+{                                                                              \
+       .name                   = SCAT3(_access, _with_af, #_mark_cmd),         \
+       .guest_prepare          = { _PREPARE(_with_af),                         \
+                                   _PREPARE(_access) },                        \
+       .mem_mark_cmd           = _mark_cmd,                                    \
+       .guest_test             = _access,                                      \
+       .guest_test_check       = { _CHECK(_with_af) },                         \
+       .expected_events        = { 0 },                                        \
+}
+
+#define TEST_UFFD(_access, _with_af, _mark_cmd,                                        \
+                 _uffd_data_handler, _uffd_pt_handler, _uffd_faults)           \
+{                                                                              \
+       .name                   = SCAT4(uffd, _access, _with_af, #_mark_cmd),   \
+       .guest_prepare          = { _PREPARE(_with_af),                         \
+                                   _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .mem_mark_cmd           = _mark_cmd,                                    \
+       .guest_test_check       = { _CHECK(_with_af) },                         \
+       .uffd_data_handler      = _uffd_data_handler,                           \
+       .uffd_pt_handler        = _uffd_pt_handler,                             \
+       .expected_events        = { .uffd_faults = _uffd_faults, },             \
+}
+
+#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check)              \
+{                                                                              \
+       .name                   = SCAT3(dirty_log, _access, _with_af),          \
+       .data_memslot_flags     = KVM_MEM_LOG_DIRTY_PAGES,                      \
+       .pt_memslot_flags       = KVM_MEM_LOG_DIRTY_PAGES,                      \
+       .guest_prepare          = { _PREPARE(_with_af),                         \
+                                   _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .guest_test_check       = { _CHECK(_with_af), _test_check, _pt_check }, \
+       .expected_events        = { 0 },                                        \
+}
+
+#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler,         \
+                               _uffd_faults, _test_check, _pt_check)           \
+{                                                                              \
+       .name                   = SCAT3(uffd_and_dirty_log, _access, _with_af), \
+       .data_memslot_flags     = KVM_MEM_LOG_DIRTY_PAGES,                      \
+       .pt_memslot_flags       = KVM_MEM_LOG_DIRTY_PAGES,                      \
+       .guest_prepare          = { _PREPARE(_with_af),                         \
+                                   _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
+       .guest_test_check       = { _CHECK(_with_af), _test_check, _pt_check }, \
+       .uffd_data_handler      = _uffd_data_handler,                           \
+       .uffd_pt_handler        = uffd_pt_handler,                              \
+       .expected_events        = { .uffd_faults = _uffd_faults, },             \
+}
+
+#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits)                   \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot, _access),                   \
+       .data_memslot_flags     = KVM_MEM_READONLY,                             \
+       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .mmio_handler           = _mmio_handler,                                \
+       .expected_events        = { .mmio_exits = _mmio_exits },                \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME(_access)                                   \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot_no_syndrome, _access),       \
+       .data_memslot_flags     = KVM_MEM_READONLY,                             \
+       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
+       .expected_events        = { .fail_vcpu_runs = 1 },                      \
+}
+
+#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits,     \
+                                     _test_check)                              \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot, _access),                   \
+       .data_memslot_flags     = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
+       .pt_memslot_flags       = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .guest_test_check       = { _test_check },                              \
+       .mmio_handler           = _mmio_handler,                                \
+       .expected_events        = { .mmio_exits = _mmio_exits},                 \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check)                \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot_no_syn_and_dlog, _access),   \
+       .data_memslot_flags     = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
+       .pt_memslot_flags       = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,   \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .guest_test_check       = { _test_check },                              \
+       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
+       .expected_events        = { .fail_vcpu_runs = 1 },                      \
+}
+
+#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits,          \
+                                _uffd_data_handler, _uffd_faults)              \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot_uffd, _access),              \
+       .data_memslot_flags     = KVM_MEM_READONLY,                             \
+       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
+       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .uffd_data_handler      = _uffd_data_handler,                           \
+       .uffd_pt_handler        = uffd_pt_handler,                              \
+       .mmio_handler           = _mmio_handler,                                \
+       .expected_events        = { .mmio_exits = _mmio_exits,                  \
+                                   .uffd_faults = _uffd_faults },              \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler,      \
+                                            _uffd_faults)                      \
+{                                                                              \
+       .name                   = SCAT2(ro_memslot_no_syndrome, _access),       \
+       .data_memslot_flags     = KVM_MEM_READONLY,                             \
+       .pt_memslot_flags       = KVM_MEM_READONLY,                             \
+       .mem_mark_cmd           = CMD_HOLE_DATA | CMD_HOLE_PT,                  \
+       .guest_prepare          = { _PREPARE(_access) },                        \
+       .guest_test             = _access,                                      \
+       .uffd_data_handler      = _uffd_data_handler,                           \
+       .uffd_pt_handler        = uffd_pt_handler,                      \
+       .fail_vcpu_run_handler  = fail_vcpu_run_mmio_no_syndrome_handler,       \
+       .expected_events        = { .fail_vcpu_runs = 1,                        \
+                                   .uffd_faults = _uffd_faults },              \
+}
+
+static struct test_desc tests[] = {
+
+       /* Check that HW is setting the Access Flag (AF) (sanity checks). */
+       TEST_ACCESS(guest_read64, with_af, CMD_NONE),
+       TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
+       TEST_ACCESS(guest_cas, with_af, CMD_NONE),
+       TEST_ACCESS(guest_write64, with_af, CMD_NONE),
+       TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
+       TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
+       TEST_ACCESS(guest_exec, with_af, CMD_NONE),
+
+       /*
+        * Punch a hole in the data backing store, and then try multiple
+        * accesses: reads should rturn zeroes, and writes should
+        * re-populate the page. Moreover, the test also check that no
+        * exception was generated in the guest.  Note that this
+        * reading/writing behavior is the same as reading/writing a
+        * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
+        * userspace.
+        */
+       TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
+       TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
+
+       /*
+        * Punch holes in the data and PT backing stores and mark them for
+        * userfaultfd handling. This should result in 2 faults: the access
+        * on the data backing store, and its respective S1 page table walk
+        * (S1PTW).
+        */
+       TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       /*
+        * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
+        * The S1PTW fault should still be marked as a write.
+        */
+       TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_no_handler, uffd_pt_handler, 1),
+       TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+       TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+                 uffd_data_handler, uffd_pt_handler, 2),
+
+       /*
+        * Try accesses when the data and PT memory regions are both
+        * tracked for dirty logging.
+        */
+       TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
+                      guest_check_no_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_ld_preidx, with_af,
+                      guest_check_no_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
+                      guest_check_no_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+       TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
+                      guest_check_s1ptw_wr_in_dirty_log),
+
+       /*
+        * Access when the data and PT memory regions are both marked for
+        * dirty logging and UFFD at the same time. The expected result is
+        * that writes should mark the dirty log and trigger a userfaultfd
+        * write fault.  Reads/execs should result in a read userfaultfd
+        * fault, and nothing in the dirty log.  Any S1PTW should result in
+        * a write in the dirty log and a userfaultfd write.
+        */
+       TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
+                               uffd_data_handler, 2,
+                               guest_check_no_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
+                               uffd_data_handler, 2,
+                               guest_check_no_write_in_dirty_log,
+                               guest_check_no_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
+                               uffd_data_handler,
+                               2, guest_check_no_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
+                               guest_check_no_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
+                               uffd_data_handler, 2,
+                               guest_check_no_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
+                               uffd_data_handler,
+                               2, guest_check_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
+                               uffd_data_handler, 2,
+                               guest_check_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
+                               uffd_data_handler,
+                               2, guest_check_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
+                               uffd_data_handler, 2,
+                               guest_check_write_in_dirty_log,
+                               guest_check_s1ptw_wr_in_dirty_log),
+       /*
+        * Access when both the PT and data regions are marked read-only
+        * (with KVM_MEM_READONLY). Writes with a syndrome result in an
+        * MMIO exit, writes with no syndrome (e.g., CAS) result in a
+        * failed vcpu run, and reads/execs with and without syndroms do
+        * not fault.
+        */
+       TEST_RO_MEMSLOT(guest_read64, 0, 0),
+       TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
+       TEST_RO_MEMSLOT(guest_at, 0, 0),
+       TEST_RO_MEMSLOT(guest_exec, 0, 0),
+       TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
+       TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
+       TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
+       TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
+
+       /*
+        * The PT and data regions are both read-only and marked
+        * for dirty logging at the same time. The expected result is that
+        * for writes there should be no write in the dirty log. The
+        * readonly handling is the same as if the memslot was not marked
+        * for dirty logging: writes with a syndrome result in an MMIO
+        * exit, and writes with no syndrome result in a failed vcpu run.
+        */
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
+                                     guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
+                                     guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
+                                     guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
+                                     guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
+                                     1, guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
+                                                 guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
+                                                 guest_check_no_write_in_dirty_log),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
+                                                 guest_check_no_write_in_dirty_log),
+
+       /*
+        * The PT and data regions are both read-only and punched with
+        * holes tracked with userfaultfd.  The expected result is the
+        * union of both userfaultfd and read-only behaviors. For example,
+        * write accesses result in a userfaultfd write fault and an MMIO
+        * exit.  Writes with no syndrome result in a failed vcpu run and
+        * no userfaultfd write fault. Reads result in userfaultfd getting
+        * triggered.
+        */
+       TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
+       TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
+                                uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
+       TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
+
+       { 0 }
+};
+
+static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
+{
+       struct test_desc *t;
+
+       for (t = &tests[0]; t->name; t++) {
+               if (t->skip)
+                       continue;
+
+               struct test_params p = {
+                       .src_type = src_type,
+                       .test_desc = t,
+               };
+
+               for_each_guest_mode(run_test, &p);
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       enum vm_mem_backing_src_type src_type;
+       int opt;
+
+       src_type = DEFAULT_VM_MEM_SRC;
+
+       while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
+               switch (opt) {
+               case 'm':
+                       guest_modes_cmdline(optarg);
+                       break;
+               case 's':
+                       src_type = parse_backing_src_type(optarg);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       exit(0);
+               }
+       }
+
+       for_each_test_and_guest_mode(src_type);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c
new file mode 100644 (file)
index 0000000..ab491ee
--- /dev/null
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * psci_test - Tests relating to KVM's PSCI implementation.
+ *
+ * Copyright (c) 2021 Google LLC.
+ *
+ * This test includes:
+ *  - A regression test for a race between KVM servicing the PSCI CPU_ON call
+ *    and userspace reading the targeted vCPU's registers.
+ *  - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
+ *    KVM_SYSTEM_EVENT_SUSPEND UAPI.
+ */
+
+#include <linux/kernel.h>
+#include <linux/psci.h>
+#include <asm/cputype.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
+#define CPU_ON_CONTEXT_ID 0xdeadc0deul
+
+static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
+                           uint64_t context_id)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+                 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static uint64_t psci_affinity_info(uint64_t target_affinity,
+                                  uint64_t lowest_affinity_level)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+                 0, 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
+                 0, 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static uint64_t psci_features(uint32_t func_id)
+{
+       struct arm_smccc_res res;
+
+       smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
+
+       return res.a0;
+}
+
+static void vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+       struct kvm_mp_state mp_state = {
+               .mp_state = KVM_MP_STATE_STOPPED,
+       };
+
+       vcpu_mp_state_set(vcpu, &mp_state);
+}
+
+static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
+                              struct kvm_vcpu **target)
+{
+       struct kvm_vcpu_init init;
+       struct kvm_vm *vm;
+
+       vm = vm_create(2);
+
+       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+       init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+       *source = aarch64_vcpu_add(vm, 0, &init, guest_code);
+       *target = aarch64_vcpu_add(vm, 1, &init, guest_code);
+
+       return vm;
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+       if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+               REPORT_GUEST_ASSERT(uc);
+}
+
+static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+       uint64_t obs_pc, obs_x0;
+
+       obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+       obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
+
+       TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
+                   "unexpected target cpu pc: %lx (expected: %lx)",
+                   obs_pc, CPU_ON_ENTRY_ADDR);
+       TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
+                   "unexpected target context id: %lx (expected: %lx)",
+                   obs_x0, CPU_ON_CONTEXT_ID);
+}
+
+static void guest_test_cpu_on(uint64_t target_cpu)
+{
+       uint64_t target_state;
+
+       GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+
+       do {
+               target_state = psci_affinity_info(target_cpu, 0);
+
+               GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+                            (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+       } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+       GUEST_DONE();
+}
+
+static void host_test_cpu_on(void)
+{
+       struct kvm_vcpu *source, *target;
+       uint64_t target_mpidr;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = setup_vm(guest_test_cpu_on, &source, &target);
+
+       /*
+        * make sure the target is already off when executing the test.
+        */
+       vcpu_power_off(target);
+
+       target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
+       vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
+       enter_guest(source);
+
+       if (get_ucall(source, &uc) != UCALL_DONE)
+               TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
+
+       assert_vcpu_reset(target);
+       kvm_vm_free(vm);
+}
+
+static void guest_test_system_suspend(void)
+{
+       uint64_t ret;
+
+       /* assert that SYSTEM_SUSPEND is discoverable */
+       GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
+       GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND));
+
+       ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID);
+       GUEST_SYNC(ret);
+}
+
+static void host_test_system_suspend(void)
+{
+       struct kvm_vcpu *source, *target;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+
+       vm = setup_vm(guest_test_system_suspend, &source, &target);
+       vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0);
+
+       vcpu_power_off(target);
+       run = source->run;
+
+       enter_guest(source);
+
+       TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT);
+       TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
+                   "Unhandled system event: %u (expected: %u)",
+                   run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
+
+       kvm_vm_free(vm);
+}
+
+static void guest_test_system_off2(void)
+{
+       uint64_t ret;
+
+       /* assert that SYSTEM_OFF2 is discoverable */
+       GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) &
+                    PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
+       GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) &
+                    PSCI_1_3_OFF_TYPE_HIBERNATE_OFF);
+
+       /* With non-zero 'cookie' field, it should fail */
+       ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1);
+       GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
+
+       /*
+        * This would normally never return, so KVM sets the return value
+        * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so
+        * that it can test both values for HIBERNATE_OFF.
+        */
+       ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0);
+       GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
+
+       /*
+        * Revision F.b of the PSCI v1.3 specification documents zero as an
+        * alias for HIBERNATE_OFF, since that's the value used in earlier
+        * revisions of the spec and some implementations in the field.
+        */
+       ret = psci_system_off2(0, 1);
+       GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS);
+
+       ret = psci_system_off2(0, 0);
+       GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE);
+
+       GUEST_DONE();
+}
+
+static void host_test_system_off2(void)
+{
+       struct kvm_vcpu *source, *target;
+       struct kvm_mp_state mps;
+       uint64_t psci_version = 0;
+       int nr_shutdowns = 0;
+       struct kvm_run *run;
+       struct ucall uc;
+
+       setup_vm(guest_test_system_off2, &source, &target);
+
+       psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION);
+
+       TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3),
+                   "Unexpected PSCI version %lu.%lu",
+                   PSCI_VERSION_MAJOR(psci_version),
+                   PSCI_VERSION_MINOR(psci_version));
+
+       vcpu_power_off(target);
+       run = source->run;
+
+       enter_guest(source);
+       while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) {
+               TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN,
+                           "Unhandled system event: %u (expected: %u)",
+                           run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN);
+               TEST_ASSERT(run->system_event.ndata >= 1,
+                           "Unexpected amount of system event data: %u (expected, >= 1)",
+                           run->system_event.ndata);
+               TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2,
+                           "PSCI_OFF2 flag not set. Flags %llu (expected %llu)",
+                           run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2);
+
+               nr_shutdowns++;
+
+               /* Restart the vCPU */
+               mps.mp_state = KVM_MP_STATE_RUNNABLE;
+               vcpu_mp_state_set(source, &mps);
+
+               enter_guest(source);
+       }
+
+       TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly");
+       TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns);
+}
+
+int main(void)
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND));
+
+       host_test_cpu_on();
+       host_test_system_suspend();
+       host_test_system_off2();
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
new file mode 100644 (file)
index 0000000..bc6cf50
--- /dev/null
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * set_id_regs - Test for setting ID register from usersapce.
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ *
+ * Test that KVM supports setting ID registers from userspace and handles the
+ * feature set correctly.
+ */
+
+#include <stdint.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+enum ftr_type {
+       FTR_EXACT,                      /* Use a predefined safe value */
+       FTR_LOWER_SAFE,                 /* Smaller value is safe */
+       FTR_HIGHER_SAFE,                /* Bigger value is safe */
+       FTR_HIGHER_OR_ZERO_SAFE,        /* Bigger value is safe, but 0 is biggest */
+       FTR_END,                        /* Mark the last ftr bits */
+};
+
+#define FTR_SIGNED     true    /* Value should be treated as signed */
+#define FTR_UNSIGNED   false   /* Value should be treated as unsigned */
+
+struct reg_ftr_bits {
+       char *name;
+       bool sign;
+       enum ftr_type type;
+       uint8_t shift;
+       uint64_t mask;
+       /*
+        * For FTR_EXACT, safe_val is used as the exact safe value.
+        * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
+        */
+       int64_t safe_val;
+};
+
+struct test_feature_reg {
+       uint32_t reg;
+       const struct reg_ftr_bits *ftr_bits;
+};
+
+#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL)      \
+       {                                                               \
+               .name = #NAME,                                          \
+               .sign = SIGNED,                                         \
+               .type = TYPE,                                           \
+               .shift = SHIFT,                                         \
+               .mask = MASK,                                           \
+               .safe_val = SAFE_VAL,                                   \
+       }
+
+#define REG_FTR_BITS(type, reg, field, safe_val) \
+       __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
+                      reg##_##field##_MASK, safe_val)
+
+#define S_REG_FTR_BITS(type, reg, field, safe_val) \
+       __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
+                      reg##_##field##_MASK, safe_val)
+
+#define REG_FTR_END                                    \
+       {                                               \
+               .type = FTR_END,                        \
+       }
+
+static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0),
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+       REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+       REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
+       REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
+       REG_FTR_END,
+};
+
+#define TEST_REG(id, table)                    \
+       {                                       \
+               .reg = id,                      \
+               .ftr_bits = &((table)[0]),      \
+       }
+
+static struct test_feature_reg test_regs[] = {
+       TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
+       TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
+       TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
+       TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
+       TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+       TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
+       TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
+       TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
+       TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
+       TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+       TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
+};
+
+#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
+
+static void guest_code(void)
+{
+       GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+       GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+       GUEST_REG_SYNC(SYS_CTR_EL0);
+
+       GUEST_DONE();
+}
+
+/* Return a safe value to a given ftr_bits an ftr value */
+uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+       uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+       if (ftr_bits->sign == FTR_UNSIGNED) {
+               switch (ftr_bits->type) {
+               case FTR_EXACT:
+                       ftr = ftr_bits->safe_val;
+                       break;
+               case FTR_LOWER_SAFE:
+                       if (ftr > ftr_bits->safe_val)
+                               ftr--;
+                       break;
+               case FTR_HIGHER_SAFE:
+                       if (ftr < ftr_max)
+                               ftr++;
+                       break;
+               case FTR_HIGHER_OR_ZERO_SAFE:
+                       if (ftr == ftr_max)
+                               ftr = 0;
+                       else if (ftr != 0)
+                               ftr++;
+                       break;
+               default:
+                       break;
+               }
+       } else if (ftr != ftr_max) {
+               switch (ftr_bits->type) {
+               case FTR_EXACT:
+                       ftr = ftr_bits->safe_val;
+                       break;
+               case FTR_LOWER_SAFE:
+                       if (ftr > ftr_bits->safe_val)
+                               ftr--;
+                       break;
+               case FTR_HIGHER_SAFE:
+                       if (ftr < ftr_max - 1)
+                               ftr++;
+                       break;
+               case FTR_HIGHER_OR_ZERO_SAFE:
+                       if (ftr != 0 && ftr != ftr_max - 1)
+                               ftr++;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       return ftr;
+}
+
+/* Return an invalid value to a given ftr_bits an ftr value */
+uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+       uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+       if (ftr_bits->sign == FTR_UNSIGNED) {
+               switch (ftr_bits->type) {
+               case FTR_EXACT:
+                       ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+                       break;
+               case FTR_LOWER_SAFE:
+                       ftr++;
+                       break;
+               case FTR_HIGHER_SAFE:
+                       ftr--;
+                       break;
+               case FTR_HIGHER_OR_ZERO_SAFE:
+                       if (ftr == 0)
+                               ftr = ftr_max;
+                       else
+                               ftr--;
+                       break;
+               default:
+                       break;
+               }
+       } else if (ftr != ftr_max) {
+               switch (ftr_bits->type) {
+               case FTR_EXACT:
+                       ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+                       break;
+               case FTR_LOWER_SAFE:
+                       ftr++;
+                       break;
+               case FTR_HIGHER_SAFE:
+                       ftr--;
+                       break;
+               case FTR_HIGHER_OR_ZERO_SAFE:
+                       if (ftr == 0)
+                               ftr = ftr_max - 1;
+                       else
+                               ftr--;
+                       break;
+               default:
+                       break;
+               }
+       } else {
+               ftr = 0;
+       }
+
+       return ftr;
+}
+
+static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
+                                    const struct reg_ftr_bits *ftr_bits)
+{
+       uint8_t shift = ftr_bits->shift;
+       uint64_t mask = ftr_bits->mask;
+       uint64_t val, new_val, ftr;
+
+       val = vcpu_get_reg(vcpu, reg);
+       ftr = (val & mask) >> shift;
+
+       ftr = get_safe_value(ftr_bits, ftr);
+
+       ftr <<= shift;
+       val &= ~mask;
+       val |= ftr;
+
+       vcpu_set_reg(vcpu, reg, val);
+       new_val = vcpu_get_reg(vcpu, reg);
+       TEST_ASSERT_EQ(new_val, val);
+
+       return new_val;
+}
+
+static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
+                             const struct reg_ftr_bits *ftr_bits)
+{
+       uint8_t shift = ftr_bits->shift;
+       uint64_t mask = ftr_bits->mask;
+       uint64_t val, old_val, ftr;
+       int r;
+
+       val = vcpu_get_reg(vcpu, reg);
+       ftr = (val & mask) >> shift;
+
+       ftr = get_invalid_value(ftr_bits, ftr);
+
+       old_val = val;
+       ftr <<= shift;
+       val &= ~mask;
+       val |= ftr;
+
+       r = __vcpu_set_reg(vcpu, reg, val);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+       val = vcpu_get_reg(vcpu, reg);
+       TEST_ASSERT_EQ(val, old_val);
+}
+
+static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+
+#define encoding_to_range_idx(encoding)                                                        \
+       KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding),      \
+                                    sys_reg_CRn(encoding), sys_reg_CRm(encoding),      \
+                                    sys_reg_Op2(encoding))
+
+
+static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
+{
+       uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+       struct reg_mask_range range = {
+               .addr = (__u64)masks,
+       };
+       int ret;
+
+       /* KVM should return error when reserved field is not zero */
+       range.reserved[0] = 1;
+       ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+       TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
+
+       /* Get writable masks for feature ID registers */
+       memset(range.reserved, 0, sizeof(range.reserved));
+       vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+       for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
+               const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
+               uint32_t reg_id = test_regs[i].reg;
+               uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
+               int idx;
+
+               /* Get the index to masks array for the idreg */
+               idx = encoding_to_range_idx(reg_id);
+
+               for (int j = 0;  ftr_bits[j].type != FTR_END; j++) {
+                       /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
+                       if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
+                               ksft_test_result_skip("%s on AARCH64 only system\n",
+                                                     ftr_bits[j].name);
+                               continue;
+                       }
+
+                       /* Make sure the feature field is writable */
+                       TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
+
+                       test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
+
+                       test_reg_vals[idx] = test_reg_set_success(vcpu, reg,
+                                                                 &ftr_bits[j]);
+
+                       ksft_test_result_pass("%s\n", ftr_bits[j].name);
+               }
+       }
+}
+
+#define MPAM_IDREG_TEST        6
+static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
+{
+       uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+       struct reg_mask_range range = {
+               .addr = (__u64)masks,
+       };
+       uint64_t val;
+       int idx, err;
+
+       /*
+        * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero,
+        * check that if it can be set to 1, (i.e. it is supported by the
+        * hardware), that it can't be set to other values.
+        */
+
+       /* Get writable masks for feature ID registers */
+       memset(range.reserved, 0, sizeof(range.reserved));
+       vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+       /* Writeable? Nothing to test! */
+       idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1);
+       if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) {
+               ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n");
+               return;
+       }
+
+       /* Get the id register value */
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+       /* Try to set MPAM=0. This should always be possible. */
+       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+       if (err)
+               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n");
+       else
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n");
+
+       /* Try to set MPAM=1 */
+       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+       if (err)
+               ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n");
+       else
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n");
+
+       /* Try to set MPAM=2 */
+       val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+       val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val);
+       if (err)
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n");
+       else
+               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n");
+
+       /* And again for ID_AA64PFR1_EL1.MPAM_frac */
+       idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+       if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) {
+               ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n");
+               return;
+       }
+
+       /* Get the id register value */
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+
+       /* Try to set MPAM_frac=0. This should always be possible. */
+       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+       if (err)
+               ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n");
+       else
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n");
+
+       /* Try to set MPAM_frac=1 */
+       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+       if (err)
+               ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n");
+       else
+               ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n");
+
+       /* Try to set MPAM_frac=2 */
+       val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+       val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2);
+       err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+       if (err)
+               ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n");
+       else
+               ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
+}
+
+static void test_guest_reg_read(struct kvm_vcpu *vcpu)
+{
+       bool done = false;
+       struct ucall uc;
+
+       while (!done) {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_SYNC:
+                       /* Make sure the written values are seen by guest */
+                       TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
+                                      uc.args[3]);
+                       break;
+               case UCALL_DONE:
+                       done = true;
+                       break;
+               default:
+                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+               }
+       }
+}
+
+/* Politely lifted from arch/arm64/include/asm/cache.h */
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level)       (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level)                (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level)      \
+       (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+static void test_clidr(struct kvm_vcpu *vcpu)
+{
+       uint64_t clidr;
+       int level;
+
+       clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
+
+       /* find the first empty level in the cache hierarchy */
+       for (level = 1; level < 7; level++) {
+               if (!CLIDR_CTYPE(clidr, level))
+                       break;
+       }
+
+       /*
+        * If you have a mind-boggling 7 levels of cache, congratulations, you
+        * get to fix this.
+        */
+       TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy");
+
+       /* stick in a unified cache level */
+       clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level);
+
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr);
+       test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr;
+}
+
+static void test_ctr(struct kvm_vcpu *vcpu)
+{
+       u64 ctr;
+
+       ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0));
+       ctr &= ~CTR_EL0_DIC_MASK;
+       if (ctr & CTR_EL0_IminLine_MASK)
+               ctr--;
+
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr);
+       test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+       u64 val;
+
+       test_clidr(vcpu);
+       test_ctr(vcpu);
+
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
+       val++;
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+
+       test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
+       ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
+{
+       size_t idx = encoding_to_range_idx(encoding);
+       uint64_t observed;
+
+       observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
+       TEST_ASSERT_EQ(test_reg_vals[idx], observed);
+}
+
+static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an
+        * architectural reset of the vCPU.
+        */
+       aarch64_vcpu_setup(vcpu, NULL);
+
+       for (int i = 0; i < ARRAY_SIZE(test_regs); i++)
+               test_assert_id_reg_unchanged(vcpu, test_regs[i].reg);
+
+       test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
+       test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
+       test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+
+       ksft_test_result_pass("%s\n", __func__);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       bool aarch64_only;
+       uint64_t val, el0;
+       int test_cnt;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       /* Check for AARCH64 only system */
+       val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+       el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+       aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+
+       ksft_print_header();
+
+       test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
+                  ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
+                  ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
+                  ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
+                  ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
+                  ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
+                  MPAM_IDREG_TEST;
+
+       ksft_set_plan(test_cnt);
+
+       test_vm_ftr_id_regs(vcpu, aarch64_only);
+       test_vcpu_ftr_id_regs(vcpu);
+       test_user_set_mpam_reg(vcpu);
+
+       test_guest_reg_read(vcpu);
+
+       test_reset_preserves_id_regs(vcpu);
+
+       kvm_vm_free(vm);
+
+       ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c
new file mode 100644 (file)
index 0000000..2d189f3
--- /dev/null
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * smccc_filter - Tests for the SMCCC filter UAPI.
+ *
+ * Copyright (c) 2023 Google LLC
+ *
+ * This test includes:
+ *  - Tests that the UAPI constraints are upheld by KVM. For example, userspace
+ *    is prevented from filtering the architecture range of SMCCC calls.
+ *  - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/psci.h>
+#include <stdint.h>
+
+#include "processor.h"
+#include "test_util.h"
+
+enum smccc_conduit {
+       HVC_INSN,
+       SMC_INSN,
+};
+
+#define for_each_conduit(conduit)                                      \
+       for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
+
+static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
+{
+       struct arm_smccc_res res;
+
+       if (conduit == SMC_INSN)
+               smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+       else
+               smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+
+       GUEST_SYNC(res.a0);
+}
+
+static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+                             enum kvm_smccc_filter_action action)
+{
+       struct kvm_smccc_filter filter = {
+               .base           = start,
+               .nr_functions   = nr_functions,
+               .action         = action,
+       };
+
+       return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+                                    KVM_ARM_VM_SMCCC_FILTER, &filter);
+}
+
+static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+                            enum kvm_smccc_filter_action action)
+{
+       int ret = __set_smccc_filter(vm, start, nr_functions, action);
+
+       TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
+}
+
+static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
+{
+       struct kvm_vcpu_init init;
+       struct kvm_vm *vm;
+
+       vm = vm_create(1);
+       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+
+       /*
+        * Enable in-kernel emulation of PSCI to ensure that calls are denied
+        * due to the SMCCC filter, not because of KVM.
+        */
+       init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+       *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+       return vm;
+}
+
+static void test_pad_must_be_zero(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       struct kvm_smccc_filter filter = {
+               .base           = PSCI_0_2_FN_PSCI_VERSION,
+               .nr_functions   = 1,
+               .action         = KVM_SMCCC_FILTER_DENY,
+               .pad            = { -1 },
+       };
+       int r;
+
+       r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+                                 KVM_ARM_VM_SMCCC_FILTER, &filter);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Setting filter with nonzero padding should return EINVAL");
+}
+
+/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
+static void test_filter_reserved_range(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       uint32_t smc64_fn;
+       int r;
+
+       r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
+                              1, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EEXIST,
+                   "Attempt to filter reserved range should return EEXIST");
+
+       smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
+                                     0, 0);
+
+       r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EEXIST,
+                   "Attempt to filter reserved range should return EEXIST");
+
+       kvm_vm_free(vm);
+}
+
+static void test_invalid_nr_functions(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       int r;
+
+       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Attempt to filter 0 functions should return EINVAL");
+
+       kvm_vm_free(vm);
+}
+
+static void test_overflow_nr_functions(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       int r;
+
+       r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Attempt to overflow filter range should return EINVAL");
+
+       kvm_vm_free(vm);
+}
+
+static void test_reserved_action(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       int r;
+
+       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
+       TEST_ASSERT(r < 0 && errno == EINVAL,
+                   "Attempt to use reserved filter action should return EINVAL");
+
+       kvm_vm_free(vm);
+}
+
+
+/* Test that overlapping configurations of the SMCCC filter are rejected */
+static void test_filter_overlap(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm = setup_vm(&vcpu);
+       int r;
+
+       set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+
+       r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+       TEST_ASSERT(r < 0 && errno == EEXIST,
+                   "Attempt to filter already configured range should return EEXIST");
+
+       kvm_vm_free(vm);
+}
+
+static void expect_call_denied(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       if (get_ucall(vcpu, &uc) != UCALL_SYNC)
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+
+       TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
+                   "Unexpected SMCCC return code: %lu", uc.args[1]);
+}
+
+/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
+static void test_filter_denied(void)
+{
+       enum smccc_conduit conduit;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       for_each_conduit(conduit) {
+               vm = setup_vm(&vcpu);
+
+               set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
+               vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+               vcpu_run(vcpu);
+               expect_call_denied(vcpu);
+
+               kvm_vm_free(vm);
+       }
+}
+
+static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
+                                   enum smccc_conduit conduit)
+{
+       struct kvm_run *run = vcpu->run;
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
+                   "Unexpected exit reason: %u", run->exit_reason);
+       TEST_ASSERT(run->hypercall.nr == func_id,
+                   "Unexpected SMCCC function: %llu", run->hypercall.nr);
+
+       if (conduit == SMC_INSN)
+               TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
+                           "KVM_HYPERCALL_EXIT_SMC is not set");
+       else
+               TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
+                           "KVM_HYPERCALL_EXIT_SMC is set");
+}
+
+/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
+static void test_filter_fwd_to_user(void)
+{
+       enum smccc_conduit conduit;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       for_each_conduit(conduit) {
+               vm = setup_vm(&vcpu);
+
+               set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
+               vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+               vcpu_run(vcpu);
+               expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+               kvm_vm_free(vm);
+       }
+}
+
+static bool kvm_supports_smccc_filter(void)
+{
+       struct kvm_vm *vm = vm_create_barebones();
+       int r;
+
+       r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
+
+       kvm_vm_free(vm);
+       return !r;
+}
+
+int main(void)
+{
+       TEST_REQUIRE(kvm_supports_smccc_filter());
+
+       test_pad_must_be_zero();
+       test_invalid_nr_functions();
+       test_overflow_nr_functions();
+       test_reserved_action();
+       test_filter_reserved_range();
+       test_filter_overlap();
+       test_filter_denied();
+       test_filter_fwd_to_user();
+}
diff --git a/tools/testing/selftests/kvm/arm64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c
new file mode 100644 (file)
index 0000000..80b74c6
--- /dev/null
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
+ * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
+ * configured.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+
+/*
+ * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then
+ * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1.
+ */
+static int add_init_2vcpus(struct kvm_vcpu_init *init0,
+                          struct kvm_vcpu_init *init1)
+{
+       struct kvm_vcpu *vcpu0, *vcpu1;
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones();
+
+       vcpu0 = __vm_vcpu_add(vm, 0);
+       ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+       if (ret)
+               goto free_exit;
+
+       vcpu1 = __vm_vcpu_add(vm, 1);
+       ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+       kvm_vm_free(vm);
+       return ret;
+}
+
+/*
+ * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0,
+ * and run KVM_ARM_VCPU_INIT for another vCPU with @init1.
+ */
+static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0,
+                                 struct kvm_vcpu_init *init1)
+{
+       struct kvm_vcpu *vcpu0, *vcpu1;
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones();
+
+       vcpu0 = __vm_vcpu_add(vm, 0);
+       vcpu1 = __vm_vcpu_add(vm, 1);
+
+       ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+       if (ret)
+               goto free_exit;
+
+       ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+       kvm_vm_free(vm);
+       return ret;
+}
+
+/*
+ * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
+ * configured, and two mixed-width vCPUs cannot be configured.
+ * Each of those three cases, configure vCPUs in two different orders.
+ * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
+ * KVM_ARM_VCPU_INIT for them.
+ * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
+ * and then run those commands for another vCPU.
+ */
+int main(void)
+{
+       struct kvm_vcpu_init init0, init1;
+       struct kvm_vm *vm;
+       int ret;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT));
+
+       /* Get the preferred target type and copy that to init1 for later use */
+       vm = vm_create_barebones();
+       vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0);
+       kvm_vm_free(vm);
+       init1 = init0;
+
+       /* Test with 64bit vCPUs */
+       ret = add_init_2vcpus(&init0, &init0);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
+       ret = add_2vcpus_init_2vcpus(&init0, &init0);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 64bit EL1 vCPUs failed unexpectedly");
+
+       /* Test with 32bit vCPUs */
+       init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+       ret = add_init_2vcpus(&init0, &init0);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
+       ret = add_2vcpus_init_2vcpus(&init0, &init0);
+       TEST_ASSERT(ret == 0,
+                   "Configuring 32bit EL1 vCPUs failed unexpectedly");
+
+       /* Test with mixed-width vCPUs  */
+       init0.features[0] = 0;
+       init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+       ret = add_init_2vcpus(&init0, &init1);
+       TEST_ASSERT(ret != 0,
+                   "Configuring mixed-width vCPUs worked unexpectedly");
+       ret = add_2vcpus_init_2vcpus(&init0, &init1);
+       TEST_ASSERT(ret != 0,
+                   "Configuring mixed-width vCPUs worked unexpectedly");
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c
new file mode 100644 (file)
index 0000000..b3b5fb0
--- /dev/null
@@ -0,0 +1,764 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic init sequence tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vgic.h"
+
+#define NR_VCPUS               4
+
+#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
+
+#define GICR_TYPER 0x8
+
+#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
+#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
+
+struct vm_gic {
+       struct kvm_vm *vm;
+       int gic_fd;
+       uint32_t gic_dev_type;
+};
+
+static uint64_t max_phys_size;
+
+/*
+ * Helpers to access a redistributor register and verify the ioctl() failed or
+ * succeeded as expected, and provided the correct value on success.
+ */
+static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
+                                   int want, const char *msg)
+{
+       uint32_t ignored_val;
+       int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+                                       REG_OFFSET(vcpu, offset), &ignored_val);
+
+       TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
+}
+
+static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
+                             const char *msg)
+{
+       uint32_t val;
+
+       kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+                           REG_OFFSET(vcpu, offset), &val);
+       TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val);
+}
+
+/* dummy guest code */
+static void guest_code(void)
+{
+       GUEST_SYNC(0);
+       GUEST_SYNC(1);
+       GUEST_SYNC(2);
+       GUEST_DONE();
+}
+
+/* we don't want to assert on run execution, hence that helper */
+static int run_vcpu(struct kvm_vcpu *vcpu)
+{
+       return __vcpu_run(vcpu) ? -errno : 0;
+}
+
+static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
+                                             uint32_t nr_vcpus,
+                                             struct kvm_vcpu *vcpus[])
+{
+       struct vm_gic v;
+
+       v.gic_dev_type = gic_dev_type;
+       v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+       return v;
+}
+
+static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type)
+{
+       struct vm_gic v;
+
+       v.gic_dev_type = gic_dev_type;
+       v.vm = vm_create_barebones();
+       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+       return v;
+}
+
+
+static void vm_gic_destroy(struct vm_gic *v)
+{
+       close(v->gic_fd);
+       kvm_vm_free(v->vm);
+}
+
+struct vgic_region_attr {
+       uint64_t attr;
+       uint64_t size;
+       uint64_t alignment;
+};
+
+struct vgic_region_attr gic_v3_dist_region = {
+       .attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
+       .size = 0x10000,
+       .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v3_redist_region = {
+       .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
+       .size = NR_VCPUS * 0x20000,
+       .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v2_dist_region = {
+       .attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
+       .size = 0x1000,
+       .alignment = 0x1000,
+};
+
+struct vgic_region_attr gic_v2_cpu_region = {
+       .attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
+       .size = 0x2000,
+       .alignment = 0x1000,
+};
+
+/**
+ * Helper routine that performs KVM device tests in general. Eventually the
+ * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
+ * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
+ * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
+ * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
+ * DIST region @0x1000.
+ */
+static void subtest_dist_rdist(struct vm_gic *v)
+{
+       int ret;
+       uint64_t addr;
+       struct vgic_region_attr rdist; /* CPU interface in GICv2*/
+       struct vgic_region_attr dist;
+
+       rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
+                                               : gic_v2_cpu_region;
+       dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
+                                               : gic_v2_dist_region;
+
+       /* Check existing group/attributes */
+       kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr);
+
+       kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr);
+
+       /* check non existing attribute */
+       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
+       TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
+
+       /* misaligned DIST and REDIST address settings */
+       addr = dist.alignment / 0x10;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   dist.attr, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
+
+       addr = rdist.alignment / 0x10;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   rdist.attr, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
+
+       /* out of range address */
+       addr = max_phys_size;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   dist.attr, &addr);
+       TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
+
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   rdist.attr, &addr);
+       TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
+
+       /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
+       addr = max_phys_size - dist.alignment;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   rdist.attr, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+                       "half of the redist is beyond IPA limit");
+
+       /* set REDIST base address @0x0*/
+       addr = 0x00000;
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           rdist.attr, &addr);
+
+       /* Attempt to create a second legacy redistributor region */
+       addr = 0xE0000;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   rdist.attr, &addr);
+       TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
+
+       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                    KVM_VGIC_V3_ADDR_TYPE_REDIST);
+       if (!ret) {
+               /* Attempt to mix legacy and new redistributor regions */
+               addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
+               ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+               TEST_ASSERT(ret && errno == EINVAL,
+                           "attempt to mix GICv3 REDIST and REDIST_REGION");
+       }
+
+       /*
+        * Set overlapping DIST / REDIST, cannot be detected here. Will be detected
+        * on first vcpu run instead.
+        */
+       addr = rdist.size - rdist.alignment;
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           dist.attr, &addr);
+}
+
+/* Test the new REDIST region API */
+static void subtest_v3_redist_regions(struct vm_gic *v)
+{
+       uint64_t addr, expected_addr;
+       int ret;
+
+       ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST);
+       TEST_ASSERT(!ret, "Multiple redist regions advertised");
+
+       addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0");
+
+       addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0");
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "attempt to register the first rdist region with index != 0");
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address");
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index");
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "register an rdist region overlapping with another one");
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1");
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+                   "register redist region with base address beyond IPA range");
+
+       /* The last redist is above the pa range. */
+       addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+                   "register redist region with top address beyond IPA range");
+
+       addr = 0x260000;
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION");
+
+       /*
+        * Now there are 2 redist regions:
+        * region 0 @ 0x200000 2 redists
+        * region 1 @ 0x240000 1 redist
+        * Attempt to read their characteristics
+        */
+
+       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0);
+       expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0");
+
+       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1);
+       expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1");
+
+       addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2);
+       ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region");
+
+       addr = 0x260000;
+       kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2);
+       ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist");
+}
+
+/*
+ * VGIC KVM device is created and initialized before the secondary CPUs
+ * get created
+ */
+static void test_vgic_then_vcpus(uint32_t gic_dev_type)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       int ret, i;
+
+       v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus);
+
+       subtest_dist_rdist(&v);
+
+       /* Add the rest of the VCPUs */
+       for (i = 1; i < NR_VCPUS; ++i)
+               vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+       vm_gic_destroy(&v);
+}
+
+/* All the VCPUs are created before the VGIC KVM device gets initialized */
+static void test_vcpus_then_vgic(uint32_t gic_dev_type)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       int ret;
+
+       v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus);
+
+       subtest_dist_rdist(&v);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+       vm_gic_destroy(&v);
+}
+
+#define KVM_VGIC_V2_ATTR(offset, cpu) \
+       (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \
+        FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu))
+
+#define GIC_CPU_CTRL   0x00
+
+static void test_v2_uaccess_cpuif_no_vcpus(void)
+{
+       struct vm_gic v;
+       u64 val = 0;
+       int ret;
+
+       v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2);
+       subtest_dist_rdist(&v);
+
+       ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0));
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "accessed non-existent CPU interface, want errno: %i",
+                   EINVAL);
+       ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "accessed non-existent CPU interface, want errno: %i",
+                   EINVAL);
+       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                                   KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val);
+       TEST_ASSERT(ret && errno == EINVAL,
+                   "accessed non-existent CPU interface, want errno: %i",
+                   EINVAL);
+
+       vm_gic_destroy(&v);
+}
+
+static void test_v3_new_redist_regions(void)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       void *dummy = NULL;
+       struct vm_gic v;
+       uint64_t addr;
+       int ret;
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+       subtest_v3_redist_regions(&v);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists");
+       vm_gic_destroy(&v);
+
+       /* step2 */
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+       subtest_v3_redist_regions(&v);
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init");
+
+       vm_gic_destroy(&v);
+
+       /* step 3 */
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+       subtest_v3_redist_regions(&v);
+
+       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy);
+       TEST_ASSERT(ret && errno == EFAULT,
+                   "register a third region allowing to cover the 4 vcpus");
+
+       addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       ret = run_vcpu(vcpus[3]);
+       TEST_ASSERT(!ret, "vcpu run");
+
+       vm_gic_destroy(&v);
+}
+
+static void test_v3_typer_accesses(void)
+{
+       struct vm_gic v;
+       uint64_t addr;
+       int ret, i;
+
+       v.vm = vm_create(NR_VCPUS);
+       (void)vm_vcpu_add(v.vm, 0, guest_code);
+
+       v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+       (void)vm_vcpu_add(v.vm, 3, guest_code);
+
+       v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL,
+                               "attempting to read GICR_TYPER of non created vcpu");
+
+       (void)vm_vcpu_add(v.vm, 1, guest_code);
+
+       v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY,
+                               "read GICR_TYPER before GIC initialized");
+
+       (void)vm_vcpu_add(v.vm, 2, guest_code);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       for (i = 0; i < NR_VCPUS ; i++) {
+               v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100,
+                                 "read GICR_TYPER before rdist region setting");
+       }
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       /* The 2 first rdists should be put there (vcpu 0 and 3) */
+       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0");
+       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1");
+
+       addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
+       ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+       TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
+
+       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100,
+                         "no redist region attached to vcpu #1 yet, last cannot be returned");
+       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200,
+                         "no redist region attached to vcpu #2, last cannot be returned");
+
+       addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210,
+                         "read typer of rdist #1, last properly returned");
+
+       vm_gic_destroy(&v);
+}
+
+static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
+                                                  uint32_t vcpuids[])
+{
+       struct vm_gic v;
+       int i;
+
+       v.vm = vm_create(nr_vcpus);
+       for (i = 0; i < nr_vcpus; i++)
+               vm_vcpu_add(v.vm, vcpuids[i], guest_code);
+
+       v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+       return v;
+}
+
+/**
+ * Test GICR_TYPER last bit with new redist regions
+ * rdist regions #1 and #2 are contiguous
+ * rdist region #0 @0x100000 2 rdist capacity
+ *     rdists: 0, 3 (Last)
+ * rdist region #1 @0x240000 2 rdist capacity
+ *     rdists:  5, 4 (Last)
+ * rdist region #2 @0x200000 2 rdist capacity
+ *     rdists: 1, 2
+ */
+static void test_v3_last_bit_redist_regions(void)
+{
+       uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+       struct vm_gic v;
+       uint64_t addr;
+
+       v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2");
+       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3");
+       v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5");
+       v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4");
+
+       vm_gic_destroy(&v);
+}
+
+/* Test last bit with legacy region */
+static void test_v3_last_bit_single_rdist(void)
+{
+       uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+       struct vm_gic v;
+       uint64_t addr;
+
+       v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       addr = 0x10000;
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+       v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+       v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1");
+       v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2");
+       v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3");
+       v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3");
+
+       vm_gic_destroy(&v);
+}
+
+/* Uses the legacy REDIST region API. */
+static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       int ret, i;
+       uint64_t addr;
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
+
+       /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
+       addr = max_phys_size - (3 * 2 * 0x10000);
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+       addr = 0x00000;
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+       /* Add the rest of the VCPUs */
+       for (i = 1; i < NR_VCPUS; ++i)
+               vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+       kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       /* Attempt to run a vcpu without enough redist space. */
+       ret = run_vcpu(vcpus[2]);
+       TEST_ASSERT(ret && errno == EINVAL,
+               "redist base+size above PA range detected on 1st vcpu run");
+
+       vm_gic_destroy(&v);
+}
+
+static void test_v3_its_region(void)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       uint64_t addr;
+       int its_fd, ret;
+
+       v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+       its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+
+       addr = 0x401000;
+       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
+       TEST_ASSERT(ret && errno == EINVAL,
+               "ITS region with misaligned address");
+
+       addr = max_phys_size;
+       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+               "register ITS region with base address beyond IPA range");
+
+       addr = max_phys_size - 0x10000;
+       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
+       TEST_ASSERT(ret && errno == E2BIG,
+               "Half of ITS region is beyond IPA range");
+
+       /* This one succeeds setting the ITS base */
+       addr = 0x400000;
+       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_ITS_ADDR_TYPE, &addr);
+
+       addr = 0x300000;
+       ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                                   KVM_VGIC_ITS_ADDR_TYPE, &addr);
+       TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
+
+       close(its_fd);
+       vm_gic_destroy(&v);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
+ */
+int test_kvm_device(uint32_t gic_dev_type)
+{
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       struct vm_gic v;
+       uint32_t other;
+       int ret;
+
+       v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
+
+       /* try to create a non existing KVM device */
+       ret = __kvm_test_create_device(v.vm, 0);
+       TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
+
+       /* trial mode */
+       ret = __kvm_test_create_device(v.vm, gic_dev_type);
+       if (ret)
+               return ret;
+       v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+       ret = __kvm_create_device(v.vm, gic_dev_type);
+       TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
+
+       /* try to create the other gic_dev_type */
+       other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
+                                            : KVM_DEV_TYPE_ARM_VGIC_V2;
+
+       if (!__kvm_test_create_device(v.vm, other)) {
+               ret = __kvm_create_device(v.vm, other);
+               TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST),
+                               "create GIC device while other version exists");
+       }
+
+       vm_gic_destroy(&v);
+
+       return 0;
+}
+
+void run_tests(uint32_t gic_dev_type)
+{
+       test_vcpus_then_vgic(gic_dev_type);
+       test_vgic_then_vcpus(gic_dev_type);
+
+       if (VGIC_DEV_IS_V2(gic_dev_type))
+               test_v2_uaccess_cpuif_no_vcpus();
+
+       if (VGIC_DEV_IS_V3(gic_dev_type)) {
+               test_v3_new_redist_regions();
+               test_v3_typer_accesses();
+               test_v3_last_bit_redist_regions();
+               test_v3_last_bit_single_rdist();
+               test_v3_redist_ipa_range_check_at_vcpu_run();
+               test_v3_its_region();
+       }
+}
+
+int main(int ac, char **av)
+{
+       int ret;
+       int pa_bits;
+       int cnt_impl = 0;
+
+       pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+       max_phys_size = 1ULL << pa_bits;
+
+       ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+       if (!ret) {
+               pr_info("Running GIC_v3 tests.\n");
+               run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
+               cnt_impl++;
+       }
+
+       ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
+       if (!ret) {
+               pr_info("Running GIC_v2 tests.\n");
+               run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
+               cnt_impl++;
+       }
+
+       if (!cnt_impl) {
+               print_skip("No GICv2 nor GICv3 support");
+               exit(KSFT_SKIP);
+       }
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
new file mode 100644 (file)
index 0000000..f4ac28d
--- /dev/null
@@ -0,0 +1,847 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_irq.c - Test userspace injection of IRQs
+ *
+ * This test validates the injection of IRQs from userspace using various
+ * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
+ * host to inject a specific intid via a GUEST_SYNC call, and then checks that
+ * it received it.
+ */
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+#include <sys/eventfd.h>
+#include <linux/sizes.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "vgic.h"
+
+/*
+ * Stores the user specified args; it's passed to the guest and to every test
+ * function.
+ */
+struct test_args {
+       uint32_t nr_irqs; /* number of KVM supported IRQs. */
+       bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
+       bool level_sensitive; /* 1 is level, 0 is edge */
+       int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
+       bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+};
+
+/*
+ * KVM implements 32 priority levels:
+ * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
+ *
+ * Note that these macros will still be correct in the case that KVM implements
+ * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
+ */
+#define KVM_NUM_PRIOS          32
+#define KVM_PRIO_SHIFT         3 /* steps of 8 = 1 << 3 */
+#define KVM_PRIO_STEPS         (1 << KVM_PRIO_SHIFT) /* 8 */
+#define LOWEST_PRIO            (KVM_NUM_PRIOS - 1)
+#define CPU_PRIO_MASK          (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
+#define IRQ_DEFAULT_PRIO       (LOWEST_PRIO - 1)
+#define IRQ_DEFAULT_PRIO_REG   (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
+
+/*
+ * The kvm_inject_* utilities are used by the guest to ask the host to inject
+ * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
+ */
+
+typedef enum {
+       KVM_INJECT_EDGE_IRQ_LINE = 1,
+       KVM_SET_IRQ_LINE,
+       KVM_SET_IRQ_LINE_HIGH,
+       KVM_SET_LEVEL_INFO_HIGH,
+       KVM_INJECT_IRQFD,
+       KVM_WRITE_ISPENDR,
+       KVM_WRITE_ISACTIVER,
+} kvm_inject_cmd;
+
+struct kvm_inject_args {
+       kvm_inject_cmd cmd;
+       uint32_t first_intid;
+       uint32_t num;
+       int level;
+       bool expect_failure;
+};
+
+/* Used on the guest side to perform the hypercall. */
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+               uint32_t num, int level, bool expect_failure);
+
+/* Used on the host side to get the hypercall info. */
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+               struct kvm_inject_args *args);
+
+#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure)                     \
+       kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
+
+#define KVM_INJECT_MULTI(cmd, intid, num)                                      \
+       _KVM_INJECT_MULTI(cmd, intid, num, false)
+
+#define _KVM_INJECT(cmd, intid, expect_failure)                                        \
+       _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
+
+#define KVM_INJECT(cmd, intid)                                                 \
+       _KVM_INJECT_MULTI(cmd, intid, 1, false)
+
+#define KVM_ACTIVATE(cmd, intid)                                               \
+       kvm_inject_call(cmd, intid, 1, 1, false);
+
+struct kvm_inject_desc {
+       kvm_inject_cmd cmd;
+       /* can inject PPIs, PPIs, and/or SPIs. */
+       bool sgi, ppi, spi;
+};
+
+static struct kvm_inject_desc inject_edge_fns[] = {
+       /*                                      sgi    ppi    spi */
+       { KVM_INJECT_EDGE_IRQ_LINE,             false, false, true },
+       { KVM_INJECT_IRQFD,                     false, false, true },
+       { KVM_WRITE_ISPENDR,                    true,  false, true },
+       { 0, },
+};
+
+static struct kvm_inject_desc inject_level_fns[] = {
+       /*                                      sgi    ppi    spi */
+       { KVM_SET_IRQ_LINE_HIGH,                false, true,  true },
+       { KVM_SET_LEVEL_INFO_HIGH,              false, true,  true },
+       { KVM_INJECT_IRQFD,                     false, false, true },
+       { KVM_WRITE_ISPENDR,                    false, true,  true },
+       { 0, },
+};
+
+static struct kvm_inject_desc set_active_fns[] = {
+       /*                                      sgi    ppi    spi */
+       { KVM_WRITE_ISACTIVER,                  true,  true,  true },
+       { 0, },
+};
+
+#define for_each_inject_fn(t, f)                                               \
+       for ((f) = (t); (f)->cmd; (f)++)
+
+#define for_each_supported_inject_fn(args, t, f)                               \
+       for_each_inject_fn(t, f)                                                \
+               if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
+
+#define for_each_supported_activate_fn(args, t, f)                             \
+       for_each_supported_inject_fn((args), (t), (f))
+
+/* Shared between the guest main thread and the IRQ handlers. */
+volatile uint64_t irq_handled;
+volatile uint32_t irqnr_received[MAX_SPI + 1];
+
+static void reset_stats(void)
+{
+       int i;
+
+       irq_handled = 0;
+       for (i = 0; i <= MAX_SPI; i++)
+               irqnr_received[i] = 0;
+}
+
+static uint64_t gic_read_ap1r0(void)
+{
+       uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
+
+       dsb(sy);
+       return reg;
+}
+
+static void gic_write_ap1r0(uint64_t val)
+{
+       write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
+       isb();
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level);
+
+static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
+{
+       uint32_t intid = gic_get_and_ack_irq();
+
+       if (intid == IAR_SPURIOUS)
+               return;
+
+       GUEST_ASSERT(gic_irq_get_active(intid));
+
+       if (!level_sensitive)
+               GUEST_ASSERT(!gic_irq_get_pending(intid));
+
+       if (level_sensitive)
+               guest_set_irq_line(intid, 0);
+
+       GUEST_ASSERT(intid < MAX_SPI);
+       irqnr_received[intid] += 1;
+       irq_handled += 1;
+
+       gic_set_eoi(intid);
+       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+       if (eoi_split)
+               gic_set_dir(intid);
+
+       GUEST_ASSERT(!gic_irq_get_active(intid));
+       GUEST_ASSERT(!gic_irq_get_pending(intid));
+}
+
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+               uint32_t num, int level, bool expect_failure)
+{
+       struct kvm_inject_args args = {
+               .cmd = cmd,
+               .first_intid = first_intid,
+               .num = num,
+               .level = level,
+               .expect_failure = expect_failure,
+       };
+       GUEST_SYNC(&args);
+}
+
+#define GUEST_ASSERT_IAR_EMPTY()                                               \
+do {                                                                           \
+       uint32_t _intid;                                                        \
+       _intid = gic_get_and_ack_irq();                                         \
+       GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS);                    \
+} while (0)
+
+#define CAT_HELPER(a, b) a ## b
+#define CAT(a, b) CAT_HELPER(a, b)
+#define PREFIX guest_irq_handler_
+#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
+#define GENERATE_GUEST_IRQ_HANDLER(split, lev)                                 \
+static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs)                 \
+{                                                                              \
+       guest_irq_generic_handler(split, lev);                                  \
+}
+
+GENERATE_GUEST_IRQ_HANDLER(0, 0);
+GENERATE_GUEST_IRQ_HANDLER(0, 1);
+GENERATE_GUEST_IRQ_HANDLER(1, 0);
+GENERATE_GUEST_IRQ_HANDLER(1, 1);
+
+static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
+       {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
+       {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
+};
+
+static void reset_priorities(struct test_args *args)
+{
+       int i;
+
+       for (i = 0; i < args->nr_irqs; i++)
+               gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level)
+{
+       kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
+}
+
+static void test_inject_fail(struct test_args *args,
+               uint32_t intid, kvm_inject_cmd cmd)
+{
+       reset_stats();
+
+       _KVM_INJECT(cmd, intid, true);
+       /* no IRQ to handle on entry */
+
+       GUEST_ASSERT_EQ(irq_handled, 0);
+       GUEST_ASSERT_IAR_EMPTY();
+}
+
+static void guest_inject(struct test_args *args,
+               uint32_t first_intid, uint32_t num,
+               kvm_inject_cmd cmd)
+{
+       uint32_t i;
+
+       reset_stats();
+
+       /* Cycle over all priorities to make things more interesting. */
+       for (i = first_intid; i < num + first_intid; i++)
+               gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
+
+       asm volatile("msr daifset, #2" : : : "memory");
+       KVM_INJECT_MULTI(cmd, first_intid, num);
+
+       while (irq_handled < num) {
+               wfi();
+               local_irq_enable();
+               isb(); /* handle IRQ */
+               local_irq_disable();
+       }
+       local_irq_enable();
+
+       GUEST_ASSERT_EQ(irq_handled, num);
+       for (i = first_intid; i < num + first_intid; i++)
+               GUEST_ASSERT_EQ(irqnr_received[i], 1);
+       GUEST_ASSERT_IAR_EMPTY();
+
+       reset_priorities(args);
+}
+
+/*
+ * Restore the active state of multiple concurrent IRQs (given by
+ * concurrent_irqs).  This does what a live-migration would do on the
+ * destination side assuming there are some active IRQs that were not
+ * deactivated yet.
+ */
+static void guest_restore_active(struct test_args *args,
+               uint32_t first_intid, uint32_t num,
+               kvm_inject_cmd cmd)
+{
+       uint32_t prio, intid, ap1r;
+       int i;
+
+       /*
+        * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+        * in descending order, so intid+1 can preempt intid.
+        */
+       for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
+               GUEST_ASSERT(prio >= 0);
+               intid = i + first_intid;
+               gic_set_priority(intid, prio);
+       }
+
+       /*
+        * In a real migration, KVM would restore all GIC state before running
+        * guest code.
+        */
+       for (i = 0; i < num; i++) {
+               intid = i + first_intid;
+               KVM_ACTIVATE(cmd, intid);
+               ap1r = gic_read_ap1r0();
+               ap1r |= 1U << i;
+               gic_write_ap1r0(ap1r);
+       }
+
+       /* This is where the "migration" would occur. */
+
+       /* finish handling the IRQs starting with the highest priority one. */
+       for (i = 0; i < num; i++) {
+               intid = num - i - 1 + first_intid;
+               gic_set_eoi(intid);
+               if (args->eoi_split)
+                       gic_set_dir(intid);
+       }
+
+       for (i = 0; i < num; i++)
+               GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+       GUEST_ASSERT_IAR_EMPTY();
+}
+
+/*
+ * Polls the IAR until it's not a spurious interrupt.
+ *
+ * This function should only be used in test_inject_preemption (with IRQs
+ * masked).
+ */
+static uint32_t wait_for_and_activate_irq(void)
+{
+       uint32_t intid;
+
+       do {
+               asm volatile("wfi" : : : "memory");
+               intid = gic_get_and_ack_irq();
+       } while (intid == IAR_SPURIOUS);
+
+       return intid;
+}
+
+/*
+ * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
+ * handle them without handling the actual exceptions.  This is done by masking
+ * interrupts for the whole test.
+ */
+static void test_inject_preemption(struct test_args *args,
+               uint32_t first_intid, int num,
+               kvm_inject_cmd cmd)
+{
+       uint32_t intid, prio, step = KVM_PRIO_STEPS;
+       int i;
+
+       /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+        * in descending order, so intid+1 can preempt intid.
+        */
+       for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
+               GUEST_ASSERT(prio >= 0);
+               intid = i + first_intid;
+               gic_set_priority(intid, prio);
+       }
+
+       local_irq_disable();
+
+       for (i = 0; i < num; i++) {
+               uint32_t tmp;
+               intid = i + first_intid;
+               KVM_INJECT(cmd, intid);
+               /* Each successive IRQ will preempt the previous one. */
+               tmp = wait_for_and_activate_irq();
+               GUEST_ASSERT_EQ(tmp, intid);
+               if (args->level_sensitive)
+                       guest_set_irq_line(intid, 0);
+       }
+
+       /* finish handling the IRQs starting with the highest priority one. */
+       for (i = 0; i < num; i++) {
+               intid = num - i - 1 + first_intid;
+               gic_set_eoi(intid);
+               if (args->eoi_split)
+                       gic_set_dir(intid);
+       }
+
+       local_irq_enable();
+
+       for (i = 0; i < num; i++)
+               GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+       GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+       GUEST_ASSERT_IAR_EMPTY();
+
+       reset_priorities(args);
+}
+
+static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
+{
+       uint32_t nr_irqs = args->nr_irqs;
+
+       if (f->sgi) {
+               guest_inject(args, MIN_SGI, 1, f->cmd);
+               guest_inject(args, 0, 16, f->cmd);
+       }
+
+       if (f->ppi)
+               guest_inject(args, MIN_PPI, 1, f->cmd);
+
+       if (f->spi) {
+               guest_inject(args, MIN_SPI, 1, f->cmd);
+               guest_inject(args, nr_irqs - 1, 1, f->cmd);
+               guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
+       }
+}
+
+static void test_injection_failure(struct test_args *args,
+               struct kvm_inject_desc *f)
+{
+       uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
+               test_inject_fail(args, bad_intid[i], f->cmd);
+}
+
+static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
+{
+       /*
+        * Test up to 4 levels of preemption. The reason is that KVM doesn't
+        * currently implement the ability to have more than the number-of-LRs
+        * number of concurrently active IRQs. The number of LRs implemented is
+        * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
+        */
+       if (f->sgi)
+               test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+
+       if (f->ppi)
+               test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+
+       if (f->spi)
+               test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+}
+
+static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
+{
+       /* Test up to 4 active IRQs. Same reason as in test_preemption. */
+       if (f->sgi)
+               guest_restore_active(args, MIN_SGI, 4, f->cmd);
+
+       if (f->ppi)
+               guest_restore_active(args, MIN_PPI, 4, f->cmd);
+
+       if (f->spi)
+               guest_restore_active(args, MIN_SPI, 4, f->cmd);
+}
+
+static void guest_code(struct test_args *args)
+{
+       uint32_t i, nr_irqs = args->nr_irqs;
+       bool level_sensitive = args->level_sensitive;
+       struct kvm_inject_desc *f, *inject_fns;
+
+       gic_init(GIC_V3, 1);
+
+       for (i = 0; i < nr_irqs; i++)
+               gic_irq_enable(i);
+
+       for (i = MIN_SPI; i < nr_irqs; i++)
+               gic_irq_set_config(i, !level_sensitive);
+
+       gic_set_eoi_split(args->eoi_split);
+
+       reset_priorities(args);
+       gic_set_priority_mask(CPU_PRIO_MASK);
+
+       inject_fns  = level_sensitive ? inject_level_fns
+                                     : inject_edge_fns;
+
+       local_irq_enable();
+
+       /* Start the tests. */
+       for_each_supported_inject_fn(args, inject_fns, f) {
+               test_injection(args, f);
+               test_preemption(args, f);
+               test_injection_failure(args, f);
+       }
+
+       /*
+        * Restore the active state of IRQs. This would happen when live
+        * migrating IRQs in the middle of being handled.
+        */
+       for_each_supported_activate_fn(args, set_active_fns, f)
+               test_restore_active(args, f);
+
+       GUEST_DONE();
+}
+
+static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
+                       struct test_args *test_args, bool expect_failure)
+{
+       int ret;
+
+       if (!expect_failure) {
+               kvm_arm_irq_line(vm, intid, level);
+       } else {
+               /* The interface doesn't allow larger intid's. */
+               if (intid > KVM_ARM_IRQ_NUM_MASK)
+                       return;
+
+               ret = _kvm_arm_irq_line(vm, intid, level);
+               TEST_ASSERT(ret != 0 && errno == EINVAL,
+                               "Bad intid %i did not cause KVM_IRQ_LINE "
+                               "error: rc: %i errno: %i", intid, ret, errno);
+       }
+}
+
+void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
+                       bool expect_failure)
+{
+       if (!expect_failure) {
+               kvm_irq_set_level_info(gic_fd, intid, level);
+       } else {
+               int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+               /*
+                * The kernel silently fails for invalid SPIs and SGIs (which
+                * are not level-sensitive). It only checks for intid to not
+                * spill over 1U << 10 (the max reserved SPI). Also, callers
+                * are supposed to mask the intid with 0x3ff (1023).
+                */
+               if (intid > VGIC_MAX_RESERVED)
+                       TEST_ASSERT(ret != 0 && errno == EINVAL,
+                               "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
+                               "error: rc: %i errno: %i", intid, ret, errno);
+               else
+                       TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
+                               "for intid %i failed, rc: %i errno: %i",
+                               intid, ret, errno);
+       }
+}
+
+static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
+               uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+               bool expect_failure)
+{
+       struct kvm_irq_routing *routing;
+       int ret;
+       uint64_t i;
+
+       assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
+
+       routing = kvm_gsi_routing_create();
+       for (i = intid; i < (uint64_t)intid + num; i++)
+               kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
+
+       if (!expect_failure) {
+               kvm_gsi_routing_write(vm, routing);
+       } else {
+               ret = _kvm_gsi_routing_write(vm, routing);
+               /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
+               if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
+                       TEST_ASSERT(ret != 0 && errno == EINVAL,
+                               "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
+                               "error: rc: %i errno: %i", intid, ret, errno);
+               else
+                       TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
+                               "for intid %i failed, rc: %i errno: %i",
+                               intid, ret, errno);
+       }
+}
+
+static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
+                                       struct kvm_vcpu *vcpu,
+                                       bool expect_failure)
+{
+       /*
+        * Ignore this when expecting failure as invalid intids will lead to
+        * either trying to inject SGIs when we configured the test to be
+        * level_sensitive (or the reverse), or inject large intids which
+        * will lead to writing above the ISPENDR register space (and we
+        * don't want to do that either).
+        */
+       if (!expect_failure)
+               kvm_irq_write_ispendr(gic_fd, intid, vcpu);
+}
+
+static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
+               uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+               bool expect_failure)
+{
+       int fd[MAX_SPI];
+       uint64_t val;
+       int ret, f;
+       uint64_t i;
+
+       /*
+        * There is no way to try injecting an SGI or PPI as the interface
+        * starts counting from the first SPI (above the private ones), so just
+        * exit.
+        */
+       if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
+               return;
+
+       kvm_set_gsi_routing_irqchip_check(vm, intid, num,
+                       kvm_max_routes, expect_failure);
+
+       /*
+        * If expect_failure, then just to inject anyway. These
+        * will silently fail. And in any case, the guest will check
+        * that no actual interrupt was injected for those cases.
+        */
+
+       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+               fd[f] = eventfd(0, 0);
+               TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
+       }
+
+       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+               struct kvm_irqfd irqfd = {
+                       .fd  = fd[f],
+                       .gsi = i - MIN_SPI,
+               };
+               assert(i <= (uint64_t)UINT_MAX);
+               vm_ioctl(vm, KVM_IRQFD, &irqfd);
+       }
+
+       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+               val = 1;
+               ret = write(fd[f], &val, sizeof(uint64_t));
+               TEST_ASSERT(ret == sizeof(uint64_t),
+                           __KVM_SYSCALL_ERROR("write()", ret));
+       }
+
+       for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+               close(fd[f]);
+}
+
+/* handles the valid case: intid=0xffffffff num=1 */
+#define for_each_intid(first, num, tmp, i)                                     \
+       for ((tmp) = (i) = (first);                                             \
+               (tmp) < (uint64_t)(first) + (uint64_t)(num);                    \
+               (tmp)++, (i)++)
+
+static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
+                         struct kvm_inject_args *inject_args,
+                         struct test_args *test_args)
+{
+       kvm_inject_cmd cmd = inject_args->cmd;
+       uint32_t intid = inject_args->first_intid;
+       uint32_t num = inject_args->num;
+       int level = inject_args->level;
+       bool expect_failure = inject_args->expect_failure;
+       struct kvm_vm *vm = vcpu->vm;
+       uint64_t tmp;
+       uint32_t i;
+
+       /* handles the valid case: intid=0xffffffff num=1 */
+       assert(intid < UINT_MAX - num || num == 1);
+
+       switch (cmd) {
+       case KVM_INJECT_EDGE_IRQ_LINE:
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_line_check(vm, i, 1, test_args,
+                                       expect_failure);
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_line_check(vm, i, 0, test_args,
+                                       expect_failure);
+               break;
+       case KVM_SET_IRQ_LINE:
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_line_check(vm, i, level, test_args,
+                                       expect_failure);
+               break;
+       case KVM_SET_IRQ_LINE_HIGH:
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_line_check(vm, i, 1, test_args,
+                                       expect_failure);
+               break;
+       case KVM_SET_LEVEL_INFO_HIGH:
+               for_each_intid(intid, num, tmp, i)
+                       kvm_irq_set_level_info_check(gic_fd, i, 1,
+                                       expect_failure);
+               break;
+       case KVM_INJECT_IRQFD:
+               kvm_routing_and_irqfd_check(vm, intid, num,
+                                       test_args->kvm_max_routes,
+                                       expect_failure);
+               break;
+       case KVM_WRITE_ISPENDR:
+               for (i = intid; i < intid + num; i++)
+                       kvm_irq_write_ispendr_check(gic_fd, i, vcpu,
+                                                   expect_failure);
+               break;
+       case KVM_WRITE_ISACTIVER:
+               for (i = intid; i < intid + num; i++)
+                       kvm_irq_write_isactiver(gic_fd, i, vcpu);
+               break;
+       default:
+               break;
+       }
+}
+
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+               struct kvm_inject_args *args)
+{
+       struct kvm_inject_args *kvm_args_hva;
+       vm_vaddr_t kvm_args_gva;
+
+       kvm_args_gva = uc->args[1];
+       kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
+       memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
+}
+
+static void print_args(struct test_args *args)
+{
+       printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
+                       args->nr_irqs, args->level_sensitive,
+                       args->eoi_split);
+}
+
+static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
+{
+       struct ucall uc;
+       int gic_fd;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_inject_args inject_args;
+       vm_vaddr_t args_gva;
+
+       struct test_args args = {
+               .nr_irqs = nr_irqs,
+               .level_sensitive = level_sensitive,
+               .eoi_split = eoi_split,
+               .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
+               .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
+       };
+
+       print_args(&args);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vcpu);
+
+       /* Setup the guest args page (so it gets the args). */
+       args_gva = vm_vaddr_alloc_page(vm);
+       memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+       vcpu_args_set(vcpu, 1, args_gva);
+
+       gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
+       __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
+
+       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
+               guest_irq_handlers[args.eoi_split][args.level_sensitive]);
+
+       while (1) {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       kvm_inject_get_call(vm, &uc, &inject_args);
+                       run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       close(gic_fd);
+       kvm_vm_free(vm);
+}
+
+static void help(const char *name)
+{
+       printf(
+       "\n"
+       "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
+       printf(" -n: specify number of IRQs to setup the vgic with. "
+               "It has to be a multiple of 32 and between 64 and 1024.\n");
+       printf(" -e: if 1 then EOI is split into a write to DIR on top "
+               "of writing EOI.\n");
+       printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
+       puts("");
+       exit(1);
+}
+
+int main(int argc, char **argv)
+{
+       uint32_t nr_irqs = 64;
+       bool default_args = true;
+       bool level_sensitive = false;
+       int opt;
+       bool eoi_split = false;
+
+       while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
+               switch (opt) {
+               case 'n':
+                       nr_irqs = atoi_non_negative("Number of IRQs", optarg);
+                       if (nr_irqs > 1024 || nr_irqs % 32)
+                               help(argv[0]);
+                       break;
+               case 'e':
+                       eoi_split = (bool)atoi_paranoid(optarg);
+                       default_args = false;
+                       break;
+               case 'l':
+                       level_sensitive = (bool)atoi_paranoid(optarg);
+                       default_args = false;
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       /*
+        * If the user just specified nr_irqs and/or gic_version, then run all
+        * combinations.
+        */
+       if (default_args) {
+               test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
+               test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
+               test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
+               test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+       } else {
+               test_vgic(nr_irqs, level_sensitive, eoi_split);
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
new file mode 100644 (file)
index 0000000..fc4fe52
--- /dev/null
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_lpi_stress - Stress test for KVM's ITS emulation
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+
+#include <linux/sizes.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_v3_its.h"
+#include "processor.h"
+#include "ucall.h"
+#include "vgic.h"
+
+#define TEST_MEMSLOT_INDEX     1
+
+#define GIC_LPI_OFFSET 8192
+
+static size_t nr_iterations = 1000;
+static vm_paddr_t gpa_base;
+
+static struct kvm_vm *vm;
+static struct kvm_vcpu **vcpus;
+static int gic_fd, its_fd;
+
+static struct test_data {
+       bool            request_vcpus_stop;
+       u32             nr_cpus;
+       u32             nr_devices;
+       u32             nr_event_ids;
+
+       vm_paddr_t      device_table;
+       vm_paddr_t      collection_table;
+       vm_paddr_t      cmdq_base;
+       void            *cmdq_base_va;
+       vm_paddr_t      itt_tables;
+
+       vm_paddr_t      lpi_prop_table;
+       vm_paddr_t      lpi_pend_tables;
+} test_data =  {
+       .nr_cpus        = 1,
+       .nr_devices     = 1,
+       .nr_event_ids   = 16,
+};
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+       u32 intid = gic_get_and_ack_irq();
+
+       if (intid == IAR_SPURIOUS)
+               return;
+
+       GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
+       gic_set_eoi(intid);
+}
+
+static void guest_setup_its_mappings(void)
+{
+       u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
+       u32 nr_events = test_data.nr_event_ids;
+       u32 nr_devices = test_data.nr_devices;
+       u32 nr_cpus = test_data.nr_cpus;
+
+       for (coll_id = 0; coll_id < nr_cpus; coll_id++)
+               its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
+
+       /* Round-robin the LPIs to all of the vCPUs in the VM */
+       coll_id = 0;
+       for (device_id = 0; device_id < nr_devices; device_id++) {
+               vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
+
+               its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
+                                 itt_base, SZ_64K, true);
+
+               for (event_id = 0; event_id < nr_events; event_id++) {
+                       its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
+                                          event_id, coll_id, intid++);
+
+                       coll_id = (coll_id + 1) % test_data.nr_cpus;
+               }
+       }
+}
+
+static void guest_invalidate_all_rdists(void)
+{
+       int i;
+
+       for (i = 0; i < test_data.nr_cpus; i++)
+               its_send_invall_cmd(test_data.cmdq_base_va, i);
+}
+
+static void guest_setup_gic(void)
+{
+       static atomic_int nr_cpus_ready = 0;
+       u32 cpuid = guest_get_vcpuid();
+
+       gic_init(GIC_V3, test_data.nr_cpus);
+       gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
+                             test_data.lpi_pend_tables + (cpuid * SZ_64K));
+
+       atomic_fetch_add(&nr_cpus_ready, 1);
+
+       if (cpuid > 0)
+               return;
+
+       while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
+               cpu_relax();
+
+       its_init(test_data.collection_table, SZ_64K,
+                test_data.device_table, SZ_64K,
+                test_data.cmdq_base, SZ_64K);
+
+       guest_setup_its_mappings();
+       guest_invalidate_all_rdists();
+}
+
+static void guest_code(size_t nr_lpis)
+{
+       guest_setup_gic();
+
+       GUEST_SYNC(0);
+
+       /*
+        * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
+        * never getting the stop signal.
+        */
+       while (!READ_ONCE(test_data.request_vcpus_stop))
+               cpu_relax();
+
+       GUEST_DONE();
+}
+
+static void setup_memslot(void)
+{
+       size_t pages;
+       size_t sz;
+
+       /*
+        * For the ITS:
+        *  - A single level device table
+        *  - A single level collection table
+        *  - The command queue
+        *  - An ITT for each device
+        */
+       sz = (3 + test_data.nr_devices) * SZ_64K;
+
+       /*
+        * For the redistributors:
+        *  - A shared LPI configuration table
+        *  - An LPI pending table for each vCPU
+        */
+       sz += (1 + test_data.nr_cpus) * SZ_64K;
+
+       pages = sz / vm->page_size;
+       gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
+                                   TEST_MEMSLOT_INDEX, pages, 0);
+}
+
+#define LPI_PROP_DEFAULT_PRIO  0xa0
+
+static void configure_lpis(void)
+{
+       size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
+       u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
+       size_t i;
+
+       for (i = 0; i < nr_lpis; i++) {
+               tbl[i] = LPI_PROP_DEFAULT_PRIO |
+                        LPI_PROP_GROUP1 |
+                        LPI_PROP_ENABLED;
+       }
+}
+
+static void setup_test_data(void)
+{
+       size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
+       u32 nr_devices = test_data.nr_devices;
+       u32 nr_cpus = test_data.nr_cpus;
+       vm_paddr_t cmdq_base;
+
+       test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
+                                                   gpa_base,
+                                                   TEST_MEMSLOT_INDEX);
+
+       test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
+                                                       gpa_base,
+                                                       TEST_MEMSLOT_INDEX);
+
+       cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
+                                      TEST_MEMSLOT_INDEX);
+       virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
+       test_data.cmdq_base = cmdq_base;
+       test_data.cmdq_base_va = (void *)cmdq_base;
+
+       test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
+                                                 gpa_base, TEST_MEMSLOT_INDEX);
+
+       test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
+                                                     gpa_base, TEST_MEMSLOT_INDEX);
+       configure_lpis();
+
+       test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
+                                                      gpa_base, TEST_MEMSLOT_INDEX);
+
+       sync_global_to_guest(vm, test_data);
+}
+
+static void setup_gic(void)
+{
+       gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
+       __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
+
+       its_fd = vgic_its_setup(vm);
+}
+
+static void signal_lpi(u32 device_id, u32 event_id)
+{
+       vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
+
+       struct kvm_msi msi = {
+               .address_lo     = db_addr,
+               .address_hi     = db_addr >> 32,
+               .data           = event_id,
+               .devid          = device_id,
+               .flags          = KVM_MSI_VALID_DEVID,
+       };
+
+       /*
+        * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
+        * which for arm64 implies having a valid translation in the ITS.
+        */
+       TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
+                   "KVM_SIGNAL_MSI ioctl failed");
+}
+
+static pthread_barrier_t test_setup_barrier;
+
+static void *lpi_worker_thread(void *data)
+{
+       u32 device_id = (size_t)data;
+       u32 event_id;
+       size_t i;
+
+       pthread_barrier_wait(&test_setup_barrier);
+
+       for (i = 0; i < nr_iterations; i++)
+               for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
+                       signal_lpi(device_id, event_id);
+
+       return NULL;
+}
+
+static void *vcpu_worker_thread(void *data)
+{
+       struct kvm_vcpu *vcpu = data;
+       struct ucall uc;
+
+       while (true) {
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       pthread_barrier_wait(&test_setup_barrier);
+                       continue;
+               case UCALL_DONE:
+                       return NULL;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall: %lu", uc.cmd);
+               }
+       }
+
+       return NULL;
+}
+
+static void report_stats(struct timespec delta)
+{
+       double nr_lpis;
+       double time;
+
+       nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
+
+       time = delta.tv_sec;
+       time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
+
+       pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
+}
+
+static void run_test(void)
+{
+       u32 nr_devices = test_data.nr_devices;
+       u32 nr_vcpus = test_data.nr_cpus;
+       pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
+       pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
+       struct timespec start, delta;
+       size_t i;
+
+       TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
+
+       pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
+
+       for (i = 0; i < nr_vcpus; i++)
+               pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
+
+       for (i = 0; i < nr_devices; i++)
+               pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
+
+       pthread_barrier_wait(&test_setup_barrier);
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+
+       for (i = 0; i < nr_devices; i++)
+               pthread_join(lpi_threads[i], NULL);
+
+       delta = timespec_elapsed(start);
+       write_guest_global(vm, test_data.request_vcpus_stop, true);
+
+       for (i = 0; i < nr_vcpus; i++)
+               pthread_join(vcpu_threads[i], NULL);
+
+       report_stats(delta);
+}
+
+static void setup_vm(void)
+{
+       int i;
+
+       vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+       TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
+
+       vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
+
+       vm_init_descriptor_tables(vm);
+       for (i = 0; i < test_data.nr_cpus; i++)
+               vcpu_init_descriptor_tables(vcpus[i]);
+
+       vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+       setup_memslot();
+
+       setup_gic();
+
+       setup_test_data();
+}
+
+static void destroy_vm(void)
+{
+       close(its_fd);
+       close(gic_fd);
+       kvm_vm_free(vm);
+       free(vcpus);
+}
+
+static void pr_usage(const char *name)
+{
+       pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
+       pr_info("  -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
+       pr_info("  -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
+       pr_info("  -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
+       pr_info("  -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
+}
+
+int main(int argc, char **argv)
+{
+       u32 nr_threads;
+       int c;
+
+       while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
+               switch (c) {
+               case 'v':
+                       test_data.nr_cpus = atoi(optarg);
+                       break;
+               case 'd':
+                       test_data.nr_devices = atoi(optarg);
+                       break;
+               case 'e':
+                       test_data.nr_event_ids = atoi(optarg);
+                       break;
+               case 'i':
+                       nr_iterations = strtoul(optarg, NULL, 0);
+                       break;
+               case 'h':
+               default:
+                       pr_usage(argv[0]);
+                       return 1;
+               }
+       }
+
+       nr_threads = test_data.nr_cpus + test_data.nr_devices;
+       if (nr_threads > get_nprocs())
+               pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
+                        nr_threads, get_nprocs());
+
+       setup_vm();
+
+       run_test();
+
+       destroy_vm();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
new file mode 100644 (file)
index 0000000..f16b3b2
--- /dev/null
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vpmu_counter_access - Test vPMU event counter access
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ * This test checks if the guest can see the same number of the PMU event
+ * counters (PMCR_EL0.N) that userspace sets, if the guest can access
+ * those counters, and if the guest is prevented from accessing any
+ * other counters.
+ * It also checks if the userspace accesses to the PMU regsisters honor the
+ * PMCR.N value that's set for the guest.
+ * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
+ */
+#include <kvm_util.h>
+#include <processor.h>
+#include <test_util.h>
+#include <vgic.h>
+#include <perf/arm_pmuv3.h>
+#include <linux/bitfield.h>
+
+/* The max number of the PMU event counters (excluding the cycle counter) */
+#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1)
+
+/* The cycle counter bit position that's common among the PMU registers */
+#define ARMV8_PMU_CYCLE_IDX            31
+
+struct vpmu_vm {
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       int gic_fd;
+};
+
+static struct vpmu_vm vpmu_vm;
+
+struct pmreg_sets {
+       uint64_t set_reg_id;
+       uint64_t clr_reg_id;
+};
+
+#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
+
+static uint64_t get_pmcr_n(uint64_t pmcr)
+{
+       return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
+}
+
+static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
+{
+       u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
+}
+
+static uint64_t get_counters_mask(uint64_t n)
+{
+       uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+       if (n)
+               mask |= GENMASK(n - 1, 0);
+       return mask;
+}
+
+/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline unsigned long read_sel_evcntr(int sel)
+{
+       write_sysreg(sel, pmselr_el0);
+       isb();
+       return read_sysreg(pmxevcntr_el0);
+}
+
+/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline void write_sel_evcntr(int sel, unsigned long val)
+{
+       write_sysreg(sel, pmselr_el0);
+       isb();
+       write_sysreg(val, pmxevcntr_el0);
+       isb();
+}
+
+/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline unsigned long read_sel_evtyper(int sel)
+{
+       write_sysreg(sel, pmselr_el0);
+       isb();
+       return read_sysreg(pmxevtyper_el0);
+}
+
+/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline void write_sel_evtyper(int sel, unsigned long val)
+{
+       write_sysreg(sel, pmselr_el0);
+       isb();
+       write_sysreg(val, pmxevtyper_el0);
+       isb();
+}
+
+static void pmu_disable_reset(void)
+{
+       uint64_t pmcr = read_sysreg(pmcr_el0);
+
+       /* Reset all counters, disabling them */
+       pmcr &= ~ARMV8_PMU_PMCR_E;
+       write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
+       isb();
+}
+
+#define RETURN_READ_PMEVCNTRN(n) \
+       return read_sysreg(pmevcntr##n##_el0)
+static unsigned long read_pmevcntrn(int n)
+{
+       PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+       return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+       write_sysreg(val, pmevcntr##n##_el0)
+static void write_pmevcntrn(int n, unsigned long val)
+{
+       PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+       isb();
+}
+
+#define READ_PMEVTYPERN(n) \
+       return read_sysreg(pmevtyper##n##_el0)
+static unsigned long read_pmevtypern(int n)
+{
+       PMEVN_SWITCH(n, READ_PMEVTYPERN);
+       return 0;
+}
+
+#define WRITE_PMEVTYPERN(n) \
+       write_sysreg(val, pmevtyper##n##_el0)
+static void write_pmevtypern(int n, unsigned long val)
+{
+       PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+       isb();
+}
+
+/*
+ * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
+ * accessors that test cases will use. Each of the accessors will
+ * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
+ * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
+ * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
+ *
+ * This is used to test that combinations of those accessors provide
+ * the consistent behavior.
+ */
+struct pmc_accessor {
+       /* A function to be used to read PMEVTCNTR<n>_EL0 */
+       unsigned long   (*read_cntr)(int idx);
+       /* A function to be used to write PMEVTCNTR<n>_EL0 */
+       void            (*write_cntr)(int idx, unsigned long val);
+       /* A function to be used to read PMEVTYPER<n>_EL0 */
+       unsigned long   (*read_typer)(int idx);
+       /* A function to be used to write PMEVTYPER<n>_EL0 */
+       void            (*write_typer)(int idx, unsigned long val);
+};
+
+struct pmc_accessor pmc_accessors[] = {
+       /* test with all direct accesses */
+       { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
+       /* test with all indirect accesses */
+       { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
+       /* read with direct accesses, and write with indirect accesses */
+       { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
+       /* read with indirect accesses, and write with direct accesses */
+       { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
+};
+
+/*
+ * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
+ * assuming that the pointer is one of the entries in pmc_accessors[].
+ */
+#define PMC_ACC_TO_IDX(acc)    (acc - &pmc_accessors[0])
+
+#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected)                    \
+{                                                                               \
+       uint64_t _tval = read_sysreg(regname);                                   \
+                                                                                \
+       if (set_expected)                                                        \
+               __GUEST_ASSERT((_tval & mask),                                   \
+                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
+                               _tval, mask, set_expected);                      \
+       else                                                                     \
+               __GUEST_ASSERT(!(_tval & mask),                                  \
+                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
+                               _tval, mask, set_expected);                      \
+}
+
+/*
+ * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
+ * are set or cleared as specified in @set_expected.
+ */
+static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
+{
+       GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
+       GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
+}
+
+/*
+ * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
+ * to the specified counter (@pmc_idx) can be read/written as expected.
+ * When @set_op is true, it tries to set the bit for the counter in
+ * those registers by writing the SET registers (the bit won't be set
+ * if the counter is not implemented though).
+ * Otherwise, it tries to clear the bits in the registers by writing
+ * the CLR registers.
+ * Then, it checks if the values indicated in the registers are as expected.
+ */
+static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
+{
+       uint64_t pmcr_n, test_bit = BIT(pmc_idx);
+       bool set_expected = false;
+
+       if (set_op) {
+               write_sysreg(test_bit, pmcntenset_el0);
+               write_sysreg(test_bit, pmintenset_el1);
+               write_sysreg(test_bit, pmovsset_el0);
+
+               /* The bit will be set only if the counter is implemented */
+               pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
+               set_expected = (pmc_idx < pmcr_n) ? true : false;
+       } else {
+               write_sysreg(test_bit, pmcntenclr_el0);
+               write_sysreg(test_bit, pmintenclr_el1);
+               write_sysreg(test_bit, pmovsclr_el0);
+       }
+       check_bitmap_pmu_regs(test_bit, set_expected);
+}
+
+/*
+ * Tests for reading/writing registers for the (implemented) event counter
+ * specified by @pmc_idx.
+ */
+static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+       uint64_t write_data, read_data;
+
+       /* Disable all PMCs and reset all PMCs to zero. */
+       pmu_disable_reset();
+
+       /*
+        * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
+        */
+
+       /* Make sure that the bit in those registers are set to 0 */
+       test_bitmap_pmu_regs(pmc_idx, false);
+       /* Test if setting the bit in those registers works */
+       test_bitmap_pmu_regs(pmc_idx, true);
+       /* Test if clearing the bit in those registers works */
+       test_bitmap_pmu_regs(pmc_idx, false);
+
+       /*
+        * Tests for reading/writing the event type register.
+        */
+
+       /*
+        * Set the event type register to an arbitrary value just for testing
+        * of reading/writing the register.
+        * Arm ARM says that for the event from 0x0000 to 0x003F,
+        * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
+        * the value written to the field even when the specified event
+        * is not supported.
+        */
+       write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
+       acc->write_typer(pmc_idx, write_data);
+       read_data = acc->read_typer(pmc_idx);
+       __GUEST_ASSERT(read_data == write_data,
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+
+       /*
+        * Tests for reading/writing the event count register.
+        */
+
+       read_data = acc->read_cntr(pmc_idx);
+
+       /* The count value must be 0, as it is disabled and reset */
+       __GUEST_ASSERT(read_data == 0,
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
+                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
+
+       write_data = read_data + pmc_idx + 0x12345;
+       acc->write_cntr(pmc_idx, write_data);
+       read_data = acc->read_cntr(pmc_idx);
+       __GUEST_ASSERT(read_data == write_data,
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+                      pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+}
+
+#define INVALID_EC     (-1ul)
+uint64_t expected_ec = INVALID_EC;
+
+static void guest_sync_handler(struct ex_regs *regs)
+{
+       uint64_t esr, ec;
+
+       esr = read_sysreg(esr_el1);
+       ec = ESR_ELx_EC(esr);
+
+       __GUEST_ASSERT(expected_ec == ec,
+                       "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
+                       regs->pc, esr, ec, expected_ec);
+
+       /* skip the trapping instruction */
+       regs->pc += 4;
+
+       /* Use INVALID_EC to indicate an exception occurred */
+       expected_ec = INVALID_EC;
+}
+
+/*
+ * Run the given operation that should trigger an exception with the
+ * given exception class. The exception handler (guest_sync_handler)
+ * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
+ * the instruction that trapped.
+ */
+#define TEST_EXCEPTION(ec, ops)                                \
+({                                                     \
+       GUEST_ASSERT(ec != INVALID_EC);                 \
+       WRITE_ONCE(expected_ec, ec);                    \
+       dsb(ish);                                       \
+       ops;                                            \
+       GUEST_ASSERT(expected_ec == INVALID_EC);        \
+})
+
+/*
+ * Tests for reading/writing registers for the unimplemented event counter
+ * specified by @pmc_idx (>= PMCR_EL0.N).
+ */
+static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+       /*
+        * Reading/writing the event count/type registers should cause
+        * an UNDEFINED exception.
+        */
+       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx));
+       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
+       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx));
+       TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
+       /*
+        * The bit corresponding to the (unimplemented) counter in
+        * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
+        */
+       test_bitmap_pmu_regs(pmc_idx, 1);
+       test_bitmap_pmu_regs(pmc_idx, 0);
+}
+
+/*
+ * The guest is configured with PMUv3 with @expected_pmcr_n number of
+ * event counters.
+ * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
+ * if reading/writing PMU registers for implemented or unimplemented
+ * counters works as expected.
+ */
+static void guest_code(uint64_t expected_pmcr_n)
+{
+       uint64_t pmcr, pmcr_n, unimp_mask;
+       int i, pmc;
+
+       __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
+                       "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
+                       expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
+
+       pmcr = read_sysreg(pmcr_el0);
+       pmcr_n = get_pmcr_n(pmcr);
+
+       /* Make sure that PMCR_EL0.N indicates the value userspace set */
+       __GUEST_ASSERT(pmcr_n == expected_pmcr_n,
+                       "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
+                       expected_pmcr_n, pmcr_n);
+
+       /*
+        * Make sure that (RAZ) bits corresponding to unimplemented event
+        * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
+        * to zero.
+        * (NOTE: bits for implemented event counters are reset to UNKNOWN)
+        */
+       unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
+       check_bitmap_pmu_regs(unimp_mask, false);
+
+       /*
+        * Tests for reading/writing PMU registers for implemented counters.
+        * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+        */
+       for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+               for (pmc = 0; pmc < pmcr_n; pmc++)
+                       test_access_pmc_regs(&pmc_accessors[i], pmc);
+       }
+
+       /*
+        * Tests for reading/writing PMU registers for unimplemented counters.
+        * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+        */
+       for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+               for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
+                       test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
+       }
+
+       GUEST_DONE();
+}
+
+/* Create a VM that has one vCPU with PMUv3 configured. */
+static void create_vpmu_vm(void *guest_code)
+{
+       struct kvm_vcpu_init init;
+       uint8_t pmuver, ec;
+       uint64_t dfr0, irq = 23;
+       struct kvm_device_attr irq_attr = {
+               .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+               .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+               .addr = (uint64_t)&irq,
+       };
+       struct kvm_device_attr init_attr = {
+               .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+               .attr = KVM_ARM_VCPU_PMU_V3_INIT,
+       };
+
+       /* The test creates the vpmu_vm multiple times. Ensure a clean state */
+       memset(&vpmu_vm, 0, sizeof(vpmu_vm));
+
+       vpmu_vm.vm = vm_create(1);
+       vm_init_descriptor_tables(vpmu_vm.vm);
+       for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) {
+               vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
+                                       guest_sync_handler);
+       }
+
+       /* Create vCPU with PMUv3 */
+       vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+       init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+       vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
+       vcpu_init_descriptor_tables(vpmu_vm.vcpu);
+       vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
+       __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
+                      "Failed to create vgic-v3, skipping");
+
+       /* Make sure that PMUv3 support is indicated in the ID register */
+       dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
+       pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+       TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
+                   pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
+                   "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
+
+       /* Initialize vPMU */
+       vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
+       vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
+}
+
+static void destroy_vpmu_vm(void)
+{
+       close(vpmu_vm.gic_fd);
+       kvm_vm_free(vpmu_vm.vm);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
+{
+       struct ucall uc;
+
+       vcpu_args_set(vcpu, 1, pmcr_n);
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               break;
+       }
+}
+
+static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
+{
+       struct kvm_vcpu *vcpu;
+       uint64_t pmcr, pmcr_orig;
+
+       create_vpmu_vm(guest_code);
+       vcpu = vpmu_vm.vcpu;
+
+       pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+       pmcr = pmcr_orig;
+
+       /*
+        * Setting a larger value of PMCR.N should not modify the field, and
+        * return a success.
+        */
+       set_pmcr_n(&pmcr, pmcr_n);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
+       pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+
+       if (expect_fail)
+               TEST_ASSERT(pmcr_orig == pmcr,
+                           "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
+                           pmcr, pmcr_n);
+       else
+               TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
+                           "Failed to update PMCR.N to %lu (received: %lu)",
+                           pmcr_n, get_pmcr_n(pmcr));
+}
+
+/*
+ * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
+ * and run the test.
+ */
+static void run_access_test(uint64_t pmcr_n)
+{
+       uint64_t sp;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vcpu_init init;
+
+       pr_debug("Test with pmcr_n %lu\n", pmcr_n);
+
+       test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+       vcpu = vpmu_vm.vcpu;
+
+       /* Save the initial sp to restore them later to run the guest again */
+       sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
+
+       run_vcpu(vcpu, pmcr_n);
+
+       /*
+        * Reset and re-initialize the vCPU, and run the guest code again to
+        * check if PMCR_EL0.N is preserved.
+        */
+       vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+       init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+       aarch64_vcpu_setup(vcpu, &init);
+       vcpu_init_descriptor_tables(vcpu);
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+
+       run_vcpu(vcpu, pmcr_n);
+
+       destroy_vpmu_vm();
+}
+
+static struct pmreg_sets validity_check_reg_sets[] = {
+       PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
+       PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
+       PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
+};
+
+/*
+ * Create a VM, and check if KVM handles the userspace accesses of
+ * the PMU register sets in @validity_check_reg_sets[] correctly.
+ */
+static void run_pmregs_validity_test(uint64_t pmcr_n)
+{
+       int i;
+       struct kvm_vcpu *vcpu;
+       uint64_t set_reg_id, clr_reg_id, reg_val;
+       uint64_t valid_counters_mask, max_counters_mask;
+
+       test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+       vcpu = vpmu_vm.vcpu;
+
+       valid_counters_mask = get_counters_mask(pmcr_n);
+       max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
+
+       for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
+               set_reg_id = validity_check_reg_sets[i].set_reg_id;
+               clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
+
+               /*
+                * Test if the 'set' and 'clr' variants of the registers
+                * are initialized based on the number of valid counters.
+                */
+               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
+               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+                           "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+                           KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
+               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+                           "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+                           KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+
+               /*
+                * Using the 'set' variant, force-set the register to the
+                * max number of possible counters and test if KVM discards
+                * the bits for unimplemented counters as it should.
+                */
+               vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
+
+               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
+               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+                           "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+                           KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+               reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
+               TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+                           "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+                           KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+       }
+
+       destroy_vpmu_vm();
+}
+
+/*
+ * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
+ * the vCPU to @pmcr_n, which is larger than the host value.
+ * The attempt should fail as @pmcr_n is too big to set for the vCPU.
+ */
+static void run_error_test(uint64_t pmcr_n)
+{
+       pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
+
+       test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
+       destroy_vpmu_vm();
+}
+
+/*
+ * Return the default number of implemented PMU event counters excluding
+ * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
+ */
+static uint64_t get_pmcr_n_limit(void)
+{
+       uint64_t pmcr;
+
+       create_vpmu_vm(guest_code);
+       pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+       destroy_vpmu_vm();
+       return get_pmcr_n(pmcr);
+}
+
+int main(void)
+{
+       uint64_t i, pmcr_n;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+
+       pmcr_n = get_pmcr_n_limit();
+       for (i = 0; i <= pmcr_n; i++) {
+               run_access_test(i);
+               run_pmregs_validity_test(i);
+       }
+
+       for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
+               run_error_test(i);
+
+       return 0;
+}
index 9f24303acb8cb2f9ff53d3fd2699ffb47e87a83e..e79817bd0e29e31fd530480cfba9acbae023fb1c 100644 (file)
@@ -21,7 +21,7 @@
 #include "ucall_common.h"
 
 #ifdef __aarch64__
-#include "aarch64/vgic.h"
+#include "arm64/vgic.h"
 
 static int gic_fd;
 
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
deleted file mode 100644 (file)
index bf461de..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Timer specific interface
- */
-
-#ifndef SELFTEST_KVM_ARCH_TIMER_H
-#define SELFTEST_KVM_ARCH_TIMER_H
-
-#include "processor.h"
-
-enum arch_timer {
-       VIRTUAL,
-       PHYSICAL,
-};
-
-#define CTL_ENABLE     (1 << 0)
-#define CTL_IMASK      (1 << 1)
-#define CTL_ISTATUS    (1 << 2)
-
-#define msec_to_cycles(msec)   \
-       (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
-
-#define usec_to_cycles(usec)   \
-       (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
-
-#define cycles_to_usec(cycles) \
-       ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
-
-static inline uint32_t timer_get_cntfrq(void)
-{
-       return read_sysreg(cntfrq_el0);
-}
-
-static inline uint64_t timer_get_cntct(enum arch_timer timer)
-{
-       isb();
-
-       switch (timer) {
-       case VIRTUAL:
-               return read_sysreg(cntvct_el0);
-       case PHYSICAL:
-               return read_sysreg(cntpct_el0);
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       /* We should not reach here */
-       return 0;
-}
-
-static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
-{
-       switch (timer) {
-       case VIRTUAL:
-               write_sysreg(cval, cntv_cval_el0);
-               break;
-       case PHYSICAL:
-               write_sysreg(cval, cntp_cval_el0);
-               break;
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       isb();
-}
-
-static inline uint64_t timer_get_cval(enum arch_timer timer)
-{
-       switch (timer) {
-       case VIRTUAL:
-               return read_sysreg(cntv_cval_el0);
-       case PHYSICAL:
-               return read_sysreg(cntp_cval_el0);
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       /* We should not reach here */
-       return 0;
-}
-
-static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
-{
-       switch (timer) {
-       case VIRTUAL:
-               write_sysreg(tval, cntv_tval_el0);
-               break;
-       case PHYSICAL:
-               write_sysreg(tval, cntp_tval_el0);
-               break;
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       isb();
-}
-
-static inline int32_t timer_get_tval(enum arch_timer timer)
-{
-       isb();
-       switch (timer) {
-       case VIRTUAL:
-               return read_sysreg(cntv_tval_el0);
-       case PHYSICAL:
-               return read_sysreg(cntp_tval_el0);
-       default:
-               GUEST_FAIL("Could not get timer %d\n", timer);
-       }
-
-       /* We should not reach here */
-       return 0;
-}
-
-static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
-{
-       switch (timer) {
-       case VIRTUAL:
-               write_sysreg(ctl, cntv_ctl_el0);
-               break;
-       case PHYSICAL:
-               write_sysreg(ctl, cntp_ctl_el0);
-               break;
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       isb();
-}
-
-static inline uint32_t timer_get_ctl(enum arch_timer timer)
-{
-       switch (timer) {
-       case VIRTUAL:
-               return read_sysreg(cntv_ctl_el0);
-       case PHYSICAL:
-               return read_sysreg(cntp_ctl_el0);
-       default:
-               GUEST_FAIL("Unexpected timer type = %u", timer);
-       }
-
-       /* We should not reach here */
-       return 0;
-}
-
-static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
-{
-       uint64_t now_ct = timer_get_cntct(timer);
-       uint64_t next_ct = now_ct + msec_to_cycles(msec);
-
-       timer_set_cval(timer, next_ct);
-}
-
-static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
-{
-       timer_set_tval(timer, msec_to_cycles(msec));
-}
-
-#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/aarch64/delay.h
deleted file mode 100644 (file)
index 329e4f5..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM simple delay routines
- */
-
-#ifndef SELFTEST_KVM_ARM_DELAY_H
-#define SELFTEST_KVM_ARM_DELAY_H
-
-#include "arch_timer.h"
-
-static inline void __delay(uint64_t cycles)
-{
-       enum arch_timer timer = VIRTUAL;
-       uint64_t start = timer_get_cntct(timer);
-
-       while ((timer_get_cntct(timer) - start) < cycles)
-               cpu_relax();
-}
-
-static inline void udelay(unsigned long usec)
-{
-       __delay(usec_to_cycles(usec));
-}
-
-#endif /* SELFTEST_KVM_ARM_DELAY_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h
deleted file mode 100644 (file)
index baeb3c8..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Interrupt Controller (GIC) specific defines
- */
-
-#ifndef SELFTEST_KVM_GIC_H
-#define SELFTEST_KVM_GIC_H
-
-#include <asm/kvm.h>
-
-enum gic_type {
-       GIC_V3,
-       GIC_TYPE_MAX,
-};
-
-/*
- * Note that the redistributor frames are at the end, as the range scales
- * with the number of vCPUs in the VM.
- */
-#define GITS_BASE_GPA          0x8000000ULL
-#define GICD_BASE_GPA          (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE)
-#define GICR_BASE_GPA          (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE)
-
-/* The GIC is identity-mapped into the guest at the time of setup. */
-#define GITS_BASE_GVA          ((volatile void *)GITS_BASE_GPA)
-#define GICD_BASE_GVA          ((volatile void *)GICD_BASE_GPA)
-#define GICR_BASE_GVA          ((volatile void *)GICR_BASE_GPA)
-
-#define MIN_SGI                        0
-#define MIN_PPI                        16
-#define MIN_SPI                        32
-#define MAX_SPI                        1019
-#define IAR_SPURIOUS           1023
-
-#define INTID_IS_SGI(intid)    (0       <= (intid) && (intid) < MIN_PPI)
-#define INTID_IS_PPI(intid)    (MIN_PPI <= (intid) && (intid) < MIN_SPI)
-#define INTID_IS_SPI(intid)    (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
-
-void gic_init(enum gic_type type, unsigned int nr_cpus);
-void gic_irq_enable(unsigned int intid);
-void gic_irq_disable(unsigned int intid);
-unsigned int gic_get_and_ack_irq(void);
-void gic_set_eoi(unsigned int intid);
-void gic_set_dir(unsigned int intid);
-
-/*
- * Sets the EOI mode. When split is false, EOI just drops the priority. When
- * split is true, EOI drops the priority and deactivates the interrupt.
- */
-void gic_set_eoi_split(bool split);
-void gic_set_priority_mask(uint64_t mask);
-void gic_set_priority(uint32_t intid, uint32_t prio);
-void gic_irq_set_active(unsigned int intid);
-void gic_irq_clear_active(unsigned int intid);
-bool gic_irq_get_active(unsigned int intid);
-void gic_irq_set_pending(unsigned int intid);
-void gic_irq_clear_pending(unsigned int intid);
-bool gic_irq_get_pending(unsigned int intid);
-void gic_irq_set_config(unsigned int intid, bool is_edge);
-
-void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
-                          vm_paddr_t pend_table);
-
-#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
deleted file mode 100644 (file)
index a76615f..0000000
+++ /dev/null
@@ -1,604 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-#ifndef __SELFTESTS_GIC_V3_H
-#define __SELFTESTS_GIC_V3_H
-
-/*
- * Distributor registers. We assume we're running non-secure, with ARE
- * being set. Secure-only and non-ARE registers are not described.
- */
-#define GICD_CTLR                      0x0000
-#define GICD_TYPER                     0x0004
-#define GICD_IIDR                      0x0008
-#define GICD_TYPER2                    0x000C
-#define GICD_STATUSR                   0x0010
-#define GICD_SETSPI_NSR                        0x0040
-#define GICD_CLRSPI_NSR                        0x0048
-#define GICD_SETSPI_SR                 0x0050
-#define GICD_CLRSPI_SR                 0x0058
-#define GICD_IGROUPR                   0x0080
-#define GICD_ISENABLER                 0x0100
-#define GICD_ICENABLER                 0x0180
-#define GICD_ISPENDR                   0x0200
-#define GICD_ICPENDR                   0x0280
-#define GICD_ISACTIVER                 0x0300
-#define GICD_ICACTIVER                 0x0380
-#define GICD_IPRIORITYR                        0x0400
-#define GICD_ICFGR                     0x0C00
-#define GICD_IGRPMODR                  0x0D00
-#define GICD_NSACR                     0x0E00
-#define GICD_IGROUPRnE                 0x1000
-#define GICD_ISENABLERnE               0x1200
-#define GICD_ICENABLERnE               0x1400
-#define GICD_ISPENDRnE                 0x1600
-#define GICD_ICPENDRnE                 0x1800
-#define GICD_ISACTIVERnE               0x1A00
-#define GICD_ICACTIVERnE               0x1C00
-#define GICD_IPRIORITYRnE              0x2000
-#define GICD_ICFGRnE                   0x3000
-#define GICD_IROUTER                   0x6000
-#define GICD_IROUTERnE                 0x8000
-#define GICD_IDREGS                    0xFFD0
-#define GICD_PIDR2                     0xFFE8
-
-#define ESPI_BASE_INTID                        4096
-
-/*
- * Those registers are actually from GICv2, but the spec demands that they
- * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
- */
-#define GICD_ITARGETSR                 0x0800
-#define GICD_SGIR                      0x0F00
-#define GICD_CPENDSGIR                 0x0F10
-#define GICD_SPENDSGIR                 0x0F20
-
-#define GICD_CTLR_RWP                  (1U << 31)
-#define GICD_CTLR_nASSGIreq            (1U << 8)
-#define GICD_CTLR_DS                   (1U << 6)
-#define GICD_CTLR_ARE_NS               (1U << 4)
-#define GICD_CTLR_ENABLE_G1A           (1U << 1)
-#define GICD_CTLR_ENABLE_G1            (1U << 0)
-
-#define GICD_IIDR_IMPLEMENTER_SHIFT    0
-#define GICD_IIDR_IMPLEMENTER_MASK     (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
-#define GICD_IIDR_REVISION_SHIFT       12
-#define GICD_IIDR_REVISION_MASK                (0xf << GICD_IIDR_REVISION_SHIFT)
-#define GICD_IIDR_VARIANT_SHIFT                16
-#define GICD_IIDR_VARIANT_MASK         (0xf << GICD_IIDR_VARIANT_SHIFT)
-#define GICD_IIDR_PRODUCT_ID_SHIFT     24
-#define GICD_IIDR_PRODUCT_ID_MASK      (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
-
-
-/*
- * In systems with a single security state (what we emulate in KVM)
- * the meaning of the interrupt group enable bits is slightly different
- */
-#define GICD_CTLR_ENABLE_SS_G1         (1U << 1)
-#define GICD_CTLR_ENABLE_SS_G0         (1U << 0)
-
-#define GICD_TYPER_RSS                 (1U << 26)
-#define GICD_TYPER_LPIS                        (1U << 17)
-#define GICD_TYPER_MBIS                        (1U << 16)
-#define GICD_TYPER_ESPI                        (1U << 8)
-
-#define GICD_TYPER_ID_BITS(typer)      ((((typer) >> 19) & 0x1f) + 1)
-#define GICD_TYPER_NUM_LPIS(typer)     ((((typer) >> 11) & 0x1f) + 1)
-#define GICD_TYPER_SPIS(typer)         ((((typer) & 0x1f) + 1) * 32)
-#define GICD_TYPER_ESPIS(typer)                                                \
-       (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
-
-#define GICD_TYPER2_nASSGIcap          (1U << 8)
-#define GICD_TYPER2_VIL                        (1U << 7)
-#define GICD_TYPER2_VID                        GENMASK(4, 0)
-
-#define GICD_IROUTER_SPI_MODE_ONE      (0U << 31)
-#define GICD_IROUTER_SPI_MODE_ANY      (1U << 31)
-
-#define GIC_PIDR2_ARCH_MASK            0xf0
-#define GIC_PIDR2_ARCH_GICv3           0x30
-#define GIC_PIDR2_ARCH_GICv4           0x40
-
-#define GIC_V3_DIST_SIZE               0x10000
-
-#define GIC_PAGE_SIZE_4K               0ULL
-#define GIC_PAGE_SIZE_16K              1ULL
-#define GIC_PAGE_SIZE_64K              2ULL
-#define GIC_PAGE_SIZE_MASK             3ULL
-
-/*
- * Re-Distributor registers, offsets from RD_base
- */
-#define GICR_CTLR                      GICD_CTLR
-#define GICR_IIDR                      0x0004
-#define GICR_TYPER                     0x0008
-#define GICR_STATUSR                   GICD_STATUSR
-#define GICR_WAKER                     0x0014
-#define GICR_SETLPIR                   0x0040
-#define GICR_CLRLPIR                   0x0048
-#define GICR_PROPBASER                 0x0070
-#define GICR_PENDBASER                 0x0078
-#define GICR_INVLPIR                   0x00A0
-#define GICR_INVALLR                   0x00B0
-#define GICR_SYNCR                     0x00C0
-#define GICR_IDREGS                    GICD_IDREGS
-#define GICR_PIDR2                     GICD_PIDR2
-
-#define GICR_CTLR_ENABLE_LPIS          (1UL << 0)
-#define GICR_CTLR_CES                  (1UL << 1)
-#define GICR_CTLR_IR                   (1UL << 2)
-#define GICR_CTLR_RWP                  (1UL << 3)
-
-#define GICR_TYPER_CPU_NUMBER(r)       (((r) >> 8) & 0xffff)
-
-#define EPPI_BASE_INTID                        1056
-
-#define GICR_TYPER_NR_PPIS(r)                                          \
-       ({                                                              \
-               unsigned int __ppinum = ((r) >> 27) & 0x1f;             \
-               unsigned int __nr_ppis = 16;                            \
-               if (__ppinum == 1 || __ppinum == 2)                     \
-                       __nr_ppis +=  __ppinum * 32;                    \
-                                                                       \
-               __nr_ppis;                                              \
-        })
-
-#define GICR_WAKER_ProcessorSleep      (1U << 1)
-#define GICR_WAKER_ChildrenAsleep      (1U << 2)
-
-#define GIC_BASER_CACHE_nCnB           0ULL
-#define GIC_BASER_CACHE_SameAsInner    0ULL
-#define GIC_BASER_CACHE_nC             1ULL
-#define GIC_BASER_CACHE_RaWt           2ULL
-#define GIC_BASER_CACHE_RaWb           3ULL
-#define GIC_BASER_CACHE_WaWt           4ULL
-#define GIC_BASER_CACHE_WaWb           5ULL
-#define GIC_BASER_CACHE_RaWaWt         6ULL
-#define GIC_BASER_CACHE_RaWaWb         7ULL
-#define GIC_BASER_CACHE_MASK           7ULL
-#define GIC_BASER_NonShareable         0ULL
-#define GIC_BASER_InnerShareable       1ULL
-#define GIC_BASER_OuterShareable       2ULL
-#define GIC_BASER_SHAREABILITY_MASK    3ULL
-
-#define GIC_BASER_CACHEABILITY(reg, inner_outer, type)                 \
-       (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT)
-
-#define GIC_BASER_SHAREABILITY(reg, type)                              \
-       (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
-
-/* encode a size field of width @w containing @n - 1 units */
-#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
-
-#define GICR_PROPBASER_SHAREABILITY_SHIFT              (10)
-#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT                (7)
-#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT                (56)
-#define GICR_PROPBASER_SHAREABILITY_MASK                               \
-       GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK)
-#define GICR_PROPBASER_INNER_CACHEABILITY_MASK                         \
-       GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK)
-#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK                         \
-       GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK)
-#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_PROPBASER_InnerShareable                                  \
-       GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)
-
-#define GICR_PROPBASER_nCnB    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB)
-#define GICR_PROPBASER_nC      GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC)
-#define GICR_PROPBASER_RaWt    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt)
-#define GICR_PROPBASER_RaWb    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)
-#define GICR_PROPBASER_WaWt    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt)
-#define GICR_PROPBASER_WaWb    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb)
-#define GICR_PROPBASER_RaWaWt  GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt)
-#define GICR_PROPBASER_RaWaWb  GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
-
-#define GICR_PROPBASER_IDBITS_MASK                     (0x1f)
-#define GICR_PROPBASER_ADDRESS(x)      ((x) & GENMASK_ULL(51, 12))
-#define GICR_PENDBASER_ADDRESS(x)      ((x) & GENMASK_ULL(51, 16))
-
-#define GICR_PENDBASER_SHAREABILITY_SHIFT              (10)
-#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT                (7)
-#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT                (56)
-#define GICR_PENDBASER_SHAREABILITY_MASK                               \
-       GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK)
-#define GICR_PENDBASER_INNER_CACHEABILITY_MASK                         \
-       GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK)
-#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK                         \
-       GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK)
-#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_PENDBASER_InnerShareable                                  \
-       GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)
-
-#define GICR_PENDBASER_nCnB    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB)
-#define GICR_PENDBASER_nC      GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC)
-#define GICR_PENDBASER_RaWt    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt)
-#define GICR_PENDBASER_RaWb    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)
-#define GICR_PENDBASER_WaWt    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt)
-#define GICR_PENDBASER_WaWb    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb)
-#define GICR_PENDBASER_RaWaWt  GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt)
-#define GICR_PENDBASER_RaWaWb  GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb)
-
-#define GICR_PENDBASER_PTZ                             BIT_ULL(62)
-
-/*
- * Re-Distributor registers, offsets from SGI_base
- */
-#define GICR_IGROUPR0                  GICD_IGROUPR
-#define GICR_ISENABLER0                        GICD_ISENABLER
-#define GICR_ICENABLER0                        GICD_ICENABLER
-#define GICR_ISPENDR0                  GICD_ISPENDR
-#define GICR_ICPENDR0                  GICD_ICPENDR
-#define GICR_ISACTIVER0                        GICD_ISACTIVER
-#define GICR_ICACTIVER0                        GICD_ICACTIVER
-#define GICR_IPRIORITYR0               GICD_IPRIORITYR
-#define GICR_ICFGR0                    GICD_ICFGR
-#define GICR_IGRPMODR0                 GICD_IGRPMODR
-#define GICR_NSACR                     GICD_NSACR
-
-#define GICR_TYPER_PLPIS               (1U << 0)
-#define GICR_TYPER_VLPIS               (1U << 1)
-#define GICR_TYPER_DIRTY               (1U << 2)
-#define GICR_TYPER_DirectLPIS          (1U << 3)
-#define GICR_TYPER_LAST                        (1U << 4)
-#define GICR_TYPER_RVPEID              (1U << 7)
-#define GICR_TYPER_COMMON_LPI_AFF      GENMASK_ULL(25, 24)
-#define GICR_TYPER_AFFINITY            GENMASK_ULL(63, 32)
-
-#define GICR_INVLPIR_INTID             GENMASK_ULL(31, 0)
-#define GICR_INVLPIR_VPEID             GENMASK_ULL(47, 32)
-#define GICR_INVLPIR_V                 GENMASK_ULL(63, 63)
-
-#define GICR_INVALLR_VPEID             GICR_INVLPIR_VPEID
-#define GICR_INVALLR_V                 GICR_INVLPIR_V
-
-#define GIC_V3_REDIST_SIZE             0x20000
-
-#define LPI_PROP_GROUP1                        (1 << 1)
-#define LPI_PROP_ENABLED               (1 << 0)
-
-/*
- * Re-Distributor registers, offsets from VLPI_base
- */
-#define GICR_VPROPBASER                        0x0070
-
-#define GICR_VPROPBASER_IDBITS_MASK    0x1f
-
-#define GICR_VPROPBASER_SHAREABILITY_SHIFT             (10)
-#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT       (7)
-#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT       (56)
-
-#define GICR_VPROPBASER_SHAREABILITY_MASK                              \
-       GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
-#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK                                \
-       GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
-#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK                                \
-       GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
-#define GICR_VPROPBASER_CACHEABILITY_MASK                              \
-       GICR_VPROPBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_VPROPBASER_InnerShareable                                 \
-       GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
-
-#define GICR_VPROPBASER_nCnB   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
-#define GICR_VPROPBASER_nC     GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
-#define GICR_VPROPBASER_RaWt   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
-#define GICR_VPROPBASER_RaWb   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb)
-#define GICR_VPROPBASER_WaWt   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
-#define GICR_VPROPBASER_WaWb   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
-#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
-#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
-
-/*
- * GICv4.1 VPROPBASER reinvention. A subtle mix between the old
- * VPROPBASER and ITS_BASER. Just not quite any of the two.
- */
-#define GICR_VPROPBASER_4_1_VALID      (1ULL << 63)
-#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59)
-#define GICR_VPROPBASER_4_1_INDIRECT   (1ULL << 55)
-#define GICR_VPROPBASER_4_1_PAGE_SIZE  GENMASK_ULL(54, 53)
-#define GICR_VPROPBASER_4_1_Z          (1ULL << 52)
-#define GICR_VPROPBASER_4_1_ADDR       GENMASK_ULL(51, 12)
-#define GICR_VPROPBASER_4_1_SIZE       GENMASK_ULL(6, 0)
-
-#define GICR_VPENDBASER                        0x0078
-
-#define GICR_VPENDBASER_SHAREABILITY_SHIFT             (10)
-#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT       (7)
-#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT       (56)
-#define GICR_VPENDBASER_SHAREABILITY_MASK                              \
-       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
-#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK                                \
-       GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
-#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK                                \
-       GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
-#define GICR_VPENDBASER_CACHEABILITY_MASK                              \
-       GICR_VPENDBASER_INNER_CACHEABILITY_MASK
-
-#define GICR_VPENDBASER_NonShareable                                   \
-       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
-
-#define GICR_VPENDBASER_InnerShareable                                 \
-       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable)
-
-#define GICR_VPENDBASER_nCnB   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
-#define GICR_VPENDBASER_nC     GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
-#define GICR_VPENDBASER_RaWt   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
-#define GICR_VPENDBASER_RaWb   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb)
-#define GICR_VPENDBASER_WaWt   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
-#define GICR_VPENDBASER_WaWb   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
-#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
-#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
-
-#define GICR_VPENDBASER_Dirty          (1ULL << 60)
-#define GICR_VPENDBASER_PendingLast    (1ULL << 61)
-#define GICR_VPENDBASER_IDAI           (1ULL << 62)
-#define GICR_VPENDBASER_Valid          (1ULL << 63)
-
-/*
- * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields,
- * also use the above Valid, PendingLast and Dirty.
- */
-#define GICR_VPENDBASER_4_1_DB         (1ULL << 62)
-#define GICR_VPENDBASER_4_1_VGRP0EN    (1ULL << 59)
-#define GICR_VPENDBASER_4_1_VGRP1EN    (1ULL << 58)
-#define GICR_VPENDBASER_4_1_VPEID      GENMASK_ULL(15, 0)
-
-#define GICR_VSGIR                     0x0080
-
-#define GICR_VSGIR_VPEID               GENMASK(15, 0)
-
-#define GICR_VSGIPENDR                 0x0088
-
-#define GICR_VSGIPENDR_BUSY            (1U << 31)
-#define GICR_VSGIPENDR_PENDING         GENMASK(15, 0)
-
-/*
- * ITS registers, offsets from ITS_base
- */
-#define GITS_CTLR                      0x0000
-#define GITS_IIDR                      0x0004
-#define GITS_TYPER                     0x0008
-#define GITS_MPIDR                     0x0018
-#define GITS_CBASER                    0x0080
-#define GITS_CWRITER                   0x0088
-#define GITS_CREADR                    0x0090
-#define GITS_BASER                     0x0100
-#define GITS_IDREGS_BASE               0xffd0
-#define GITS_PIDR0                     0xffe0
-#define GITS_PIDR1                     0xffe4
-#define GITS_PIDR2                     GICR_PIDR2
-#define GITS_PIDR4                     0xffd0
-#define GITS_CIDR0                     0xfff0
-#define GITS_CIDR1                     0xfff4
-#define GITS_CIDR2                     0xfff8
-#define GITS_CIDR3                     0xfffc
-
-#define GITS_TRANSLATER                        0x10040
-
-#define GITS_SGIR                      0x20020
-
-#define GITS_SGIR_VPEID                        GENMASK_ULL(47, 32)
-#define GITS_SGIR_VINTID               GENMASK_ULL(3, 0)
-
-#define GITS_CTLR_ENABLE               (1U << 0)
-#define GITS_CTLR_ImDe                 (1U << 1)
-#define        GITS_CTLR_ITS_NUMBER_SHIFT      4
-#define        GITS_CTLR_ITS_NUMBER            (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
-#define GITS_CTLR_QUIESCENT            (1U << 31)
-
-#define GITS_TYPER_PLPIS               (1UL << 0)
-#define GITS_TYPER_VLPIS               (1UL << 1)
-#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT        4
-#define GITS_TYPER_ITT_ENTRY_SIZE      GENMASK_ULL(7, 4)
-#define GITS_TYPER_IDBITS_SHIFT                8
-#define GITS_TYPER_DEVBITS_SHIFT       13
-#define GITS_TYPER_DEVBITS             GENMASK_ULL(17, 13)
-#define GITS_TYPER_PTA                 (1UL << 19)
-#define GITS_TYPER_HCC_SHIFT           24
-#define GITS_TYPER_HCC(r)              (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
-#define GITS_TYPER_VMOVP               (1ULL << 37)
-#define GITS_TYPER_VMAPP               (1ULL << 40)
-#define GITS_TYPER_SVPET               GENMASK_ULL(42, 41)
-
-#define GITS_IIDR_REV_SHIFT            12
-#define GITS_IIDR_REV_MASK             (0xf << GITS_IIDR_REV_SHIFT)
-#define GITS_IIDR_REV(r)               (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
-#define GITS_IIDR_PRODUCTID_SHIFT      24
-
-#define GITS_CBASER_VALID                      (1ULL << 63)
-#define GITS_CBASER_SHAREABILITY_SHIFT         (10)
-#define GITS_CBASER_INNER_CACHEABILITY_SHIFT   (59)
-#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT   (53)
-#define GITS_CBASER_SHAREABILITY_MASK                                  \
-       GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK)
-#define GITS_CBASER_INNER_CACHEABILITY_MASK                            \
-       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK)
-#define GITS_CBASER_OUTER_CACHEABILITY_MASK                            \
-       GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK)
-#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK
-
-#define GITS_CBASER_InnerShareable                                     \
-       GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable)
-
-#define GITS_CBASER_nCnB       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB)
-#define GITS_CBASER_nC         GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC)
-#define GITS_CBASER_RaWt       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt)
-#define GITS_CBASER_RaWb       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb)
-#define GITS_CBASER_WaWt       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt)
-#define GITS_CBASER_WaWb       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb)
-#define GITS_CBASER_RaWaWt     GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
-#define GITS_CBASER_RaWaWb     GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
-
-#define GITS_CBASER_ADDRESS(cbaser)    ((cbaser) & GENMASK_ULL(51, 12))
-
-#define GITS_BASER_NR_REGS             8
-
-#define GITS_BASER_VALID                       (1ULL << 63)
-#define GITS_BASER_INDIRECT                    (1ULL << 62)
-
-#define GITS_BASER_INNER_CACHEABILITY_SHIFT    (59)
-#define GITS_BASER_OUTER_CACHEABILITY_SHIFT    (53)
-#define GITS_BASER_INNER_CACHEABILITY_MASK                             \
-       GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK)
-#define GITS_BASER_CACHEABILITY_MASK           GITS_BASER_INNER_CACHEABILITY_MASK
-#define GITS_BASER_OUTER_CACHEABILITY_MASK                             \
-       GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK)
-#define GITS_BASER_SHAREABILITY_MASK                                   \
-       GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK)
-
-#define GITS_BASER_nCnB                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB)
-#define GITS_BASER_nC          GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC)
-#define GITS_BASER_RaWt                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt)
-#define GITS_BASER_RaWb                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)
-#define GITS_BASER_WaWt                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt)
-#define GITS_BASER_WaWb                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb)
-#define GITS_BASER_RaWaWt      GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt)
-#define GITS_BASER_RaWaWb      GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb)
-
-#define GITS_BASER_TYPE_SHIFT                  (56)
-#define GITS_BASER_TYPE(r)             (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
-#define GITS_BASER_ENTRY_SIZE_SHIFT            (48)
-#define GITS_BASER_ENTRY_SIZE(r)       ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
-#define GITS_BASER_ENTRY_SIZE_MASK     GENMASK_ULL(52, 48)
-#define GITS_BASER_PHYS_52_to_48(phys)                                 \
-       (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
-#define GITS_BASER_ADDR_48_to_52(baser)                                        \
-       (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
-
-#define GITS_BASER_SHAREABILITY_SHIFT  (10)
-#define GITS_BASER_InnerShareable                                      \
-       GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
-#define GITS_BASER_PAGE_SIZE_SHIFT     (8)
-#define __GITS_BASER_PSZ(sz)           (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT)
-#define GITS_BASER_PAGE_SIZE_4K                __GITS_BASER_PSZ(4K)
-#define GITS_BASER_PAGE_SIZE_16K       __GITS_BASER_PSZ(16K)
-#define GITS_BASER_PAGE_SIZE_64K       __GITS_BASER_PSZ(64K)
-#define GITS_BASER_PAGE_SIZE_MASK      __GITS_BASER_PSZ(MASK)
-#define GITS_BASER_PAGES_MAX           256
-#define GITS_BASER_PAGES_SHIFT         (0)
-#define GITS_BASER_NR_PAGES(r)         (((r) & 0xff) + 1)
-
-#define GITS_BASER_TYPE_NONE           0
-#define GITS_BASER_TYPE_DEVICE         1
-#define GITS_BASER_TYPE_VCPU           2
-#define GITS_BASER_TYPE_RESERVED3      3
-#define GITS_BASER_TYPE_COLLECTION     4
-#define GITS_BASER_TYPE_RESERVED5      5
-#define GITS_BASER_TYPE_RESERVED6      6
-#define GITS_BASER_TYPE_RESERVED7      7
-
-#define GITS_LVL1_ENTRY_SIZE           (8UL)
-
-/*
- * ITS commands
- */
-#define GITS_CMD_MAPD                  0x08
-#define GITS_CMD_MAPC                  0x09
-#define GITS_CMD_MAPTI                 0x0a
-#define GITS_CMD_MAPI                  0x0b
-#define GITS_CMD_MOVI                  0x01
-#define GITS_CMD_DISCARD               0x0f
-#define GITS_CMD_INV                   0x0c
-#define GITS_CMD_MOVALL                        0x0e
-#define GITS_CMD_INVALL                        0x0d
-#define GITS_CMD_INT                   0x03
-#define GITS_CMD_CLEAR                 0x04
-#define GITS_CMD_SYNC                  0x05
-
-/*
- * GICv4 ITS specific commands
- */
-#define GITS_CMD_GICv4(x)              ((x) | 0x20)
-#define GITS_CMD_VINVALL               GITS_CMD_GICv4(GITS_CMD_INVALL)
-#define GITS_CMD_VMAPP                 GITS_CMD_GICv4(GITS_CMD_MAPC)
-#define GITS_CMD_VMAPTI                        GITS_CMD_GICv4(GITS_CMD_MAPTI)
-#define GITS_CMD_VMOVI                 GITS_CMD_GICv4(GITS_CMD_MOVI)
-#define GITS_CMD_VSYNC                 GITS_CMD_GICv4(GITS_CMD_SYNC)
-/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */
-#define GITS_CMD_VMOVP                 GITS_CMD_GICv4(2)
-#define GITS_CMD_VSGI                  GITS_CMD_GICv4(3)
-#define GITS_CMD_INVDB                 GITS_CMD_GICv4(0xe)
-
-/*
- * ITS error numbers
- */
-#define E_ITS_MOVI_UNMAPPED_INTERRUPT          0x010107
-#define E_ITS_MOVI_UNMAPPED_COLLECTION         0x010109
-#define E_ITS_INT_UNMAPPED_INTERRUPT           0x010307
-#define E_ITS_CLEAR_UNMAPPED_INTERRUPT         0x010507
-#define E_ITS_MAPD_DEVICE_OOR                  0x010801
-#define E_ITS_MAPD_ITTSIZE_OOR                 0x010802
-#define E_ITS_MAPC_PROCNUM_OOR                 0x010902
-#define E_ITS_MAPC_COLLECTION_OOR              0x010903
-#define E_ITS_MAPTI_UNMAPPED_DEVICE            0x010a04
-#define E_ITS_MAPTI_ID_OOR                     0x010a05
-#define E_ITS_MAPTI_PHYSICALID_OOR             0x010a06
-#define E_ITS_INV_UNMAPPED_INTERRUPT           0x010c07
-#define E_ITS_INVALL_UNMAPPED_COLLECTION       0x010d09
-#define E_ITS_MOVALL_PROCNUM_OOR               0x010e01
-#define E_ITS_DISCARD_UNMAPPED_INTERRUPT       0x010f07
-
-/*
- * CPU interface registers
- */
-#define ICC_CTLR_EL1_EOImode_SHIFT     (1)
-#define ICC_CTLR_EL1_EOImode_drop_dir  (0U << ICC_CTLR_EL1_EOImode_SHIFT)
-#define ICC_CTLR_EL1_EOImode_drop      (1U << ICC_CTLR_EL1_EOImode_SHIFT)
-#define ICC_CTLR_EL1_EOImode_MASK      (1 << ICC_CTLR_EL1_EOImode_SHIFT)
-#define ICC_CTLR_EL1_CBPR_SHIFT                0
-#define ICC_CTLR_EL1_CBPR_MASK         (1 << ICC_CTLR_EL1_CBPR_SHIFT)
-#define ICC_CTLR_EL1_PMHE_SHIFT                6
-#define ICC_CTLR_EL1_PMHE_MASK         (1 << ICC_CTLR_EL1_PMHE_SHIFT)
-#define ICC_CTLR_EL1_PRI_BITS_SHIFT    8
-#define ICC_CTLR_EL1_PRI_BITS_MASK     (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
-#define ICC_CTLR_EL1_ID_BITS_SHIFT     11
-#define ICC_CTLR_EL1_ID_BITS_MASK      (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
-#define ICC_CTLR_EL1_SEIS_SHIFT                14
-#define ICC_CTLR_EL1_SEIS_MASK         (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
-#define ICC_CTLR_EL1_A3V_SHIFT         15
-#define ICC_CTLR_EL1_A3V_MASK          (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
-#define ICC_CTLR_EL1_RSS               (0x1 << 18)
-#define ICC_CTLR_EL1_ExtRange          (0x1 << 19)
-#define ICC_PMR_EL1_SHIFT              0
-#define ICC_PMR_EL1_MASK               (0xff << ICC_PMR_EL1_SHIFT)
-#define ICC_BPR0_EL1_SHIFT             0
-#define ICC_BPR0_EL1_MASK              (0x7 << ICC_BPR0_EL1_SHIFT)
-#define ICC_BPR1_EL1_SHIFT             0
-#define ICC_BPR1_EL1_MASK              (0x7 << ICC_BPR1_EL1_SHIFT)
-#define ICC_IGRPEN0_EL1_SHIFT          0
-#define ICC_IGRPEN0_EL1_MASK           (1 << ICC_IGRPEN0_EL1_SHIFT)
-#define ICC_IGRPEN1_EL1_SHIFT          0
-#define ICC_IGRPEN1_EL1_MASK           (1 << ICC_IGRPEN1_EL1_SHIFT)
-#define ICC_SRE_EL1_DIB                        (1U << 2)
-#define ICC_SRE_EL1_DFB                        (1U << 1)
-#define ICC_SRE_EL1_SRE                        (1U << 0)
-
-/* These are for GICv2 emulation only */
-#define GICH_LR_VIRTUALID              (0x3ffUL << 0)
-#define GICH_LR_PHYSID_CPUID_SHIFT     (10)
-#define GICH_LR_PHYSID_CPUID           (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
-
-#define ICC_IAR1_EL1_SPURIOUS          0x3ff
-
-#define ICC_SRE_EL2_SRE                        (1 << 0)
-#define ICC_SRE_EL2_ENABLE             (1 << 3)
-
-#define ICC_SGI1R_TARGET_LIST_SHIFT    0
-#define ICC_SGI1R_TARGET_LIST_MASK     (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
-#define ICC_SGI1R_AFFINITY_1_SHIFT     16
-#define ICC_SGI1R_AFFINITY_1_MASK      (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
-#define ICC_SGI1R_SGI_ID_SHIFT         24
-#define ICC_SGI1R_SGI_ID_MASK          (0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
-#define ICC_SGI1R_AFFINITY_2_SHIFT     32
-#define ICC_SGI1R_AFFINITY_2_MASK      (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT)
-#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
-#define ICC_SGI1R_RS_SHIFT             44
-#define ICC_SGI1R_RS_MASK              (0xfULL << ICC_SGI1R_RS_SHIFT)
-#define ICC_SGI1R_AFFINITY_3_SHIFT     48
-#define ICC_SGI1R_AFFINITY_3_MASK      (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT)
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h
deleted file mode 100644 (file)
index 3722ed9..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef __SELFTESTS_GIC_V3_ITS_H__
-#define __SELFTESTS_GIC_V3_ITS_H__
-
-#include <linux/sizes.h>
-
-void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
-             vm_paddr_t device_tbl, size_t device_tbl_sz,
-             vm_paddr_t cmdq, size_t cmdq_size);
-
-void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
-                      size_t itt_size, bool valid);
-void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
-void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
-                       u32 collection_id, u32 intid);
-void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
-
-#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h
deleted file mode 100644 (file)
index e43a57d..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UTIL_ARCH_H
-#define SELFTEST_KVM_UTIL_ARCH_H
-
-struct kvm_vm_arch {};
-
-#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
deleted file mode 100644 (file)
index 1e8d0d5..0000000
+++ /dev/null
@@ -1,238 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * AArch64 processor specific defines
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include "kvm_util.h"
-#include "ucall_common.h"
-
-#include <linux/stringify.h>
-#include <linux/types.h>
-#include <asm/brk-imm.h>
-#include <asm/esr.h>
-#include <asm/sysreg.h>
-
-
-#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
-                          KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-
-/*
- * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
- * SYS_* register definitions in asm/sysreg.h to use in KVM
- * calls such as vcpu_get_reg() and vcpu_set_reg().
- */
-#define KVM_ARM64_SYS_REG(sys_reg_id)                  \
-       ARM64_SYS_REG(sys_reg_Op0(sys_reg_id),          \
-                       sys_reg_Op1(sys_reg_id),        \
-                       sys_reg_CRn(sys_reg_id),        \
-                       sys_reg_CRm(sys_reg_id),        \
-                       sys_reg_Op2(sys_reg_id))
-
-/*
- * Default MAIR
- *                  index   attribute
- * DEVICE_nGnRnE      0     0000:0000
- * DEVICE_nGnRE       1     0000:0100
- * DEVICE_GRE         2     0000:1100
- * NORMAL_NC          3     0100:0100
- * NORMAL             4     1111:1111
- * NORMAL_WT          5     1011:1011
- */
-
-/* Linux doesn't use these memory types, so let's define them. */
-#define MAIR_ATTR_DEVICE_GRE   UL(0x0c)
-#define MAIR_ATTR_NORMAL_WT    UL(0xbb)
-
-#define MT_DEVICE_nGnRnE       0
-#define MT_DEVICE_nGnRE                1
-#define MT_DEVICE_GRE          2
-#define MT_NORMAL_NC           3
-#define MT_NORMAL              4
-#define MT_NORMAL_WT           5
-
-#define DEFAULT_MAIR_EL1                                                       \
-       (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |              \
-        MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) |                \
-        MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) |                    \
-        MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) |                      \
-        MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) |                            \
-        MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
-
-void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 struct kvm_vcpu_init *init, void *guest_code);
-
-struct ex_regs {
-       u64 regs[31];
-       u64 sp;
-       u64 pc;
-       u64 pstate;
-};
-
-#define VECTOR_NUM     16
-
-enum {
-       VECTOR_SYNC_CURRENT_SP0,
-       VECTOR_IRQ_CURRENT_SP0,
-       VECTOR_FIQ_CURRENT_SP0,
-       VECTOR_ERROR_CURRENT_SP0,
-
-       VECTOR_SYNC_CURRENT,
-       VECTOR_IRQ_CURRENT,
-       VECTOR_FIQ_CURRENT,
-       VECTOR_ERROR_CURRENT,
-
-       VECTOR_SYNC_LOWER_64,
-       VECTOR_IRQ_LOWER_64,
-       VECTOR_FIQ_LOWER_64,
-       VECTOR_ERROR_LOWER_64,
-
-       VECTOR_SYNC_LOWER_32,
-       VECTOR_IRQ_LOWER_32,
-       VECTOR_FIQ_LOWER_32,
-       VECTOR_ERROR_LOWER_32,
-};
-
-#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
-                          (v) == VECTOR_SYNC_CURRENT     || \
-                          (v) == VECTOR_SYNC_LOWER_64    || \
-                          (v) == VECTOR_SYNC_LOWER_32)
-
-/* Access flag */
-#define PTE_AF                 (1ULL << 10)
-
-/* Access flag update enable/disable */
-#define TCR_EL1_HA             (1ULL << 39)
-
-void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
-                                       uint32_t *ipa16k, uint32_t *ipa64k);
-
-void vm_init_descriptor_tables(struct kvm_vm *vm);
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
-
-typedef void(*handler_fn)(struct ex_regs *);
-void vm_install_exception_handler(struct kvm_vm *vm,
-               int vector, handler_fn handler);
-void vm_install_sync_handler(struct kvm_vm *vm,
-               int vector, int ec, handler_fn handler);
-
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
-
-static inline void cpu_relax(void)
-{
-       asm volatile("yield" ::: "memory");
-}
-
-#define isb()          asm volatile("isb" : : : "memory")
-#define dsb(opt)       asm volatile("dsb " #opt : : : "memory")
-#define dmb(opt)       asm volatile("dmb " #opt : : : "memory")
-
-#define dma_wmb()      dmb(oshst)
-#define __iowmb()      dma_wmb()
-
-#define dma_rmb()      dmb(oshld)
-
-#define __iormb(v)                                                     \
-({                                                                     \
-       unsigned long tmp;                                              \
-                                                                       \
-       dma_rmb();                                                      \
-                                                                       \
-       /*                                                              \
-        * Courtesy of arch/arm64/include/asm/io.h:                     \
-        * Create a dummy control dependency from the IO read to any    \
-        * later instructions. This ensures that a subsequent call      \
-        * to udelay() will be ordered due to the ISB in __delay().     \
-        */                                                             \
-       asm volatile("eor       %0, %1, %1\n"                           \
-                    "cbnz      %0, ."                                  \
-                    : "=r" (tmp) : "r" ((unsigned long)(v))            \
-                    : "memory");                                       \
-})
-
-static __always_inline void __raw_writel(u32 val, volatile void *addr)
-{
-       asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
-}
-
-static __always_inline u32 __raw_readl(const volatile void *addr)
-{
-       u32 val;
-       asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
-       return val;
-}
-
-static __always_inline void __raw_writeq(u64 val, volatile void *addr)
-{
-       asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr));
-}
-
-static __always_inline u64 __raw_readq(const volatile void *addr)
-{
-       u64 val;
-       asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
-       return val;
-}
-
-#define writel_relaxed(v,c)    ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
-#define readl_relaxed(c)       ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
-#define writeq_relaxed(v,c)    ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
-#define readq_relaxed(c)       ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
-
-#define writel(v,c)            ({ __iowmb(); writel_relaxed((v),(c));})
-#define readl(c)               ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
-#define writeq(v,c)            ({ __iowmb(); writeq_relaxed((v),(c));})
-#define readq(c)               ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
-
-
-static inline void local_irq_enable(void)
-{
-       asm volatile("msr daifclr, #3" : : : "memory");
-}
-
-static inline void local_irq_disable(void)
-{
-       asm volatile("msr daifset, #3" : : : "memory");
-}
-
-/**
- * struct arm_smccc_res - Result from SMC/HVC call
- * @a0-a3 result values from registers 0 to 3
- */
-struct arm_smccc_res {
-       unsigned long a0;
-       unsigned long a1;
-       unsigned long a2;
-       unsigned long a3;
-};
-
-/**
- * smccc_hvc - Invoke a SMCCC function using the hvc conduit
- * @function_id: the SMCCC function to be called
- * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
- * @res: pointer to write the return values from registers x0-x3
- *
- */
-void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
-              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
-              uint64_t arg6, struct arm_smccc_res *res);
-
-/**
- * smccc_smc - Invoke a SMCCC function using the smc conduit
- * @function_id: the SMCCC function to be called
- * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
- * @res: pointer to write the return values from registers x0-x3
- *
- */
-void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
-              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
-              uint64_t arg6, struct arm_smccc_res *res);
-
-/* Execute a Wait For Interrupt instruction. */
-void wfi(void);
-
-#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/aarch64/spinlock.h
deleted file mode 100644 (file)
index cf09841..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
-#define SELFTEST_KVM_ARM64_SPINLOCK_H
-
-struct spinlock {
-       int v;
-};
-
-extern void spin_lock(struct spinlock *lock);
-extern void spin_unlock(struct spinlock *lock);
-
-#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h
deleted file mode 100644 (file)
index 4ec801f..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UCALL_H
-#define SELFTEST_KVM_UCALL_H
-
-#include "kvm_util.h"
-
-#define UCALL_EXIT_REASON       KVM_EXIT_MMIO
-
-/*
- * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
- * VM), it must not be accessed from host code.
- */
-extern vm_vaddr_t *ucall_exit_mmio_addr;
-
-static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
deleted file mode 100644 (file)
index c481d0c..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Interrupt Controller (GIC) host specific defines
- */
-
-#ifndef SELFTEST_KVM_VGIC_H
-#define SELFTEST_KVM_VGIC_H
-
-#include <linux/kvm.h>
-
-#include "kvm_util.h"
-
-#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
-       (((uint64_t)(count) << 52) | \
-       ((uint64_t)((base) >> 16) << 16) | \
-       ((uint64_t)(flags) << 12) | \
-       index)
-
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
-
-#define VGIC_MAX_RESERVED      1023
-
-void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
-int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
-
-void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
-int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
-
-/* The vcpu arg only applies to private interrupts. */
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
-
-#define KVM_IRQCHIP_NUM_PINS   (1020 - 32)
-
-int vgic_its_setup(struct kvm_vm *vm);
-
-#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
new file mode 100644 (file)
index 0000000..bf461de
--- /dev/null
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Timer specific interface
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+enum arch_timer {
+       VIRTUAL,
+       PHYSICAL,
+};
+
+#define CTL_ENABLE     (1 << 0)
+#define CTL_IMASK      (1 << 1)
+#define CTL_ISTATUS    (1 << 2)
+
+#define msec_to_cycles(msec)   \
+       (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec)   \
+       (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+       ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
+
+static inline uint32_t timer_get_cntfrq(void)
+{
+       return read_sysreg(cntfrq_el0);
+}
+
+static inline uint64_t timer_get_cntct(enum arch_timer timer)
+{
+       isb();
+
+       switch (timer) {
+       case VIRTUAL:
+               return read_sysreg(cntvct_el0);
+       case PHYSICAL:
+               return read_sysreg(cntpct_el0);
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       /* We should not reach here */
+       return 0;
+}
+
+static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
+{
+       switch (timer) {
+       case VIRTUAL:
+               write_sysreg(cval, cntv_cval_el0);
+               break;
+       case PHYSICAL:
+               write_sysreg(cval, cntp_cval_el0);
+               break;
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       isb();
+}
+
+static inline uint64_t timer_get_cval(enum arch_timer timer)
+{
+       switch (timer) {
+       case VIRTUAL:
+               return read_sysreg(cntv_cval_el0);
+       case PHYSICAL:
+               return read_sysreg(cntp_cval_el0);
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       /* We should not reach here */
+       return 0;
+}
+
+static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
+{
+       switch (timer) {
+       case VIRTUAL:
+               write_sysreg(tval, cntv_tval_el0);
+               break;
+       case PHYSICAL:
+               write_sysreg(tval, cntp_tval_el0);
+               break;
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       isb();
+}
+
+static inline int32_t timer_get_tval(enum arch_timer timer)
+{
+       isb();
+       switch (timer) {
+       case VIRTUAL:
+               return read_sysreg(cntv_tval_el0);
+       case PHYSICAL:
+               return read_sysreg(cntp_tval_el0);
+       default:
+               GUEST_FAIL("Could not get timer %d\n", timer);
+       }
+
+       /* We should not reach here */
+       return 0;
+}
+
+static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
+{
+       switch (timer) {
+       case VIRTUAL:
+               write_sysreg(ctl, cntv_ctl_el0);
+               break;
+       case PHYSICAL:
+               write_sysreg(ctl, cntp_ctl_el0);
+               break;
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       isb();
+}
+
+static inline uint32_t timer_get_ctl(enum arch_timer timer)
+{
+       switch (timer) {
+       case VIRTUAL:
+               return read_sysreg(cntv_ctl_el0);
+       case PHYSICAL:
+               return read_sysreg(cntp_ctl_el0);
+       default:
+               GUEST_FAIL("Unexpected timer type = %u", timer);
+       }
+
+       /* We should not reach here */
+       return 0;
+}
+
+static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
+{
+       uint64_t now_ct = timer_get_cntct(timer);
+       uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+       timer_set_cval(timer, next_ct);
+}
+
+static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
+{
+       timer_set_tval(timer, msec_to_cycles(msec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h
new file mode 100644 (file)
index 0000000..329e4f5
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM simple delay routines
+ */
+
+#ifndef SELFTEST_KVM_ARM_DELAY_H
+#define SELFTEST_KVM_ARM_DELAY_H
+
+#include "arch_timer.h"
+
+static inline void __delay(uint64_t cycles)
+{
+       enum arch_timer timer = VIRTUAL;
+       uint64_t start = timer_get_cntct(timer);
+
+       while ((timer_get_cntct(timer) - start) < cycles)
+               cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+       __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARM_DELAY_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h
new file mode 100644 (file)
index 0000000..baeb3c8
--- /dev/null
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) specific defines
+ */
+
+#ifndef SELFTEST_KVM_GIC_H
+#define SELFTEST_KVM_GIC_H
+
+#include <asm/kvm.h>
+
+enum gic_type {
+       GIC_V3,
+       GIC_TYPE_MAX,
+};
+
+/*
+ * Note that the redistributor frames are at the end, as the range scales
+ * with the number of vCPUs in the VM.
+ */
+#define GITS_BASE_GPA          0x8000000ULL
+#define GICD_BASE_GPA          (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE)
+#define GICR_BASE_GPA          (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE)
+
+/* The GIC is identity-mapped into the guest at the time of setup. */
+#define GITS_BASE_GVA          ((volatile void *)GITS_BASE_GPA)
+#define GICD_BASE_GVA          ((volatile void *)GICD_BASE_GPA)
+#define GICR_BASE_GVA          ((volatile void *)GICR_BASE_GPA)
+
+#define MIN_SGI                        0
+#define MIN_PPI                        16
+#define MIN_SPI                        32
+#define MAX_SPI                        1019
+#define IAR_SPURIOUS           1023
+
+#define INTID_IS_SGI(intid)    (0       <= (intid) && (intid) < MIN_PPI)
+#define INTID_IS_PPI(intid)    (MIN_PPI <= (intid) && (intid) < MIN_SPI)
+#define INTID_IS_SPI(intid)    (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
+
+void gic_init(enum gic_type type, unsigned int nr_cpus);
+void gic_irq_enable(unsigned int intid);
+void gic_irq_disable(unsigned int intid);
+unsigned int gic_get_and_ack_irq(void);
+void gic_set_eoi(unsigned int intid);
+void gic_set_dir(unsigned int intid);
+
+/*
+ * Sets the EOI mode. When split is false, EOI just drops the priority. When
+ * split is true, EOI drops the priority and deactivates the interrupt.
+ */
+void gic_set_eoi_split(bool split);
+void gic_set_priority_mask(uint64_t mask);
+void gic_set_priority(uint32_t intid, uint32_t prio);
+void gic_irq_set_active(unsigned int intid);
+void gic_irq_clear_active(unsigned int intid);
+bool gic_irq_get_active(unsigned int intid);
+void gic_irq_set_pending(unsigned int intid);
+void gic_irq_clear_pending(unsigned int intid);
+bool gic_irq_get_pending(unsigned int intid);
+void gic_irq_set_config(unsigned int intid, bool is_edge);
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+                          vm_paddr_t pend_table);
+
+#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h
new file mode 100644 (file)
index 0000000..a76615f
--- /dev/null
@@ -0,0 +1,604 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+#ifndef __SELFTESTS_GIC_V3_H
+#define __SELFTESTS_GIC_V3_H
+
+/*
+ * Distributor registers. We assume we're running non-secure, with ARE
+ * being set. Secure-only and non-ARE registers are not described.
+ */
+#define GICD_CTLR                      0x0000
+#define GICD_TYPER                     0x0004
+#define GICD_IIDR                      0x0008
+#define GICD_TYPER2                    0x000C
+#define GICD_STATUSR                   0x0010
+#define GICD_SETSPI_NSR                        0x0040
+#define GICD_CLRSPI_NSR                        0x0048
+#define GICD_SETSPI_SR                 0x0050
+#define GICD_CLRSPI_SR                 0x0058
+#define GICD_IGROUPR                   0x0080
+#define GICD_ISENABLER                 0x0100
+#define GICD_ICENABLER                 0x0180
+#define GICD_ISPENDR                   0x0200
+#define GICD_ICPENDR                   0x0280
+#define GICD_ISACTIVER                 0x0300
+#define GICD_ICACTIVER                 0x0380
+#define GICD_IPRIORITYR                        0x0400
+#define GICD_ICFGR                     0x0C00
+#define GICD_IGRPMODR                  0x0D00
+#define GICD_NSACR                     0x0E00
+#define GICD_IGROUPRnE                 0x1000
+#define GICD_ISENABLERnE               0x1200
+#define GICD_ICENABLERnE               0x1400
+#define GICD_ISPENDRnE                 0x1600
+#define GICD_ICPENDRnE                 0x1800
+#define GICD_ISACTIVERnE               0x1A00
+#define GICD_ICACTIVERnE               0x1C00
+#define GICD_IPRIORITYRnE              0x2000
+#define GICD_ICFGRnE                   0x3000
+#define GICD_IROUTER                   0x6000
+#define GICD_IROUTERnE                 0x8000
+#define GICD_IDREGS                    0xFFD0
+#define GICD_PIDR2                     0xFFE8
+
+#define ESPI_BASE_INTID                        4096
+
+/*
+ * Those registers are actually from GICv2, but the spec demands that they
+ * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
+ */
+#define GICD_ITARGETSR                 0x0800
+#define GICD_SGIR                      0x0F00
+#define GICD_CPENDSGIR                 0x0F10
+#define GICD_SPENDSGIR                 0x0F20
+
+#define GICD_CTLR_RWP                  (1U << 31)
+#define GICD_CTLR_nASSGIreq            (1U << 8)
+#define GICD_CTLR_DS                   (1U << 6)
+#define GICD_CTLR_ARE_NS               (1U << 4)
+#define GICD_CTLR_ENABLE_G1A           (1U << 1)
+#define GICD_CTLR_ENABLE_G1            (1U << 0)
+
+#define GICD_IIDR_IMPLEMENTER_SHIFT    0
+#define GICD_IIDR_IMPLEMENTER_MASK     (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT       12
+#define GICD_IIDR_REVISION_MASK                (0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT                16
+#define GICD_IIDR_VARIANT_MASK         (0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT     24
+#define GICD_IIDR_PRODUCT_ID_MASK      (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
+/*
+ * In systems with a single security state (what we emulate in KVM)
+ * the meaning of the interrupt group enable bits is slightly different
+ */
+#define GICD_CTLR_ENABLE_SS_G1         (1U << 1)
+#define GICD_CTLR_ENABLE_SS_G0         (1U << 0)
+
+#define GICD_TYPER_RSS                 (1U << 26)
+#define GICD_TYPER_LPIS                        (1U << 17)
+#define GICD_TYPER_MBIS                        (1U << 16)
+#define GICD_TYPER_ESPI                        (1U << 8)
+
+#define GICD_TYPER_ID_BITS(typer)      ((((typer) >> 19) & 0x1f) + 1)
+#define GICD_TYPER_NUM_LPIS(typer)     ((((typer) >> 11) & 0x1f) + 1)
+#define GICD_TYPER_SPIS(typer)         ((((typer) & 0x1f) + 1) * 32)
+#define GICD_TYPER_ESPIS(typer)                                                \
+       (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
+
+#define GICD_TYPER2_nASSGIcap          (1U << 8)
+#define GICD_TYPER2_VIL                        (1U << 7)
+#define GICD_TYPER2_VID                        GENMASK(4, 0)
+
+#define GICD_IROUTER_SPI_MODE_ONE      (0U << 31)
+#define GICD_IROUTER_SPI_MODE_ANY      (1U << 31)
+
+#define GIC_PIDR2_ARCH_MASK            0xf0
+#define GIC_PIDR2_ARCH_GICv3           0x30
+#define GIC_PIDR2_ARCH_GICv4           0x40
+
+#define GIC_V3_DIST_SIZE               0x10000
+
+#define GIC_PAGE_SIZE_4K               0ULL
+#define GIC_PAGE_SIZE_16K              1ULL
+#define GIC_PAGE_SIZE_64K              2ULL
+#define GIC_PAGE_SIZE_MASK             3ULL
+
+/*
+ * Re-Distributor registers, offsets from RD_base
+ */
+#define GICR_CTLR                      GICD_CTLR
+#define GICR_IIDR                      0x0004
+#define GICR_TYPER                     0x0008
+#define GICR_STATUSR                   GICD_STATUSR
+#define GICR_WAKER                     0x0014
+#define GICR_SETLPIR                   0x0040
+#define GICR_CLRLPIR                   0x0048
+#define GICR_PROPBASER                 0x0070
+#define GICR_PENDBASER                 0x0078
+#define GICR_INVLPIR                   0x00A0
+#define GICR_INVALLR                   0x00B0
+#define GICR_SYNCR                     0x00C0
+#define GICR_IDREGS                    GICD_IDREGS
+#define GICR_PIDR2                     GICD_PIDR2
+
+#define GICR_CTLR_ENABLE_LPIS          (1UL << 0)
+#define GICR_CTLR_CES                  (1UL << 1)
+#define GICR_CTLR_IR                   (1UL << 2)
+#define GICR_CTLR_RWP                  (1UL << 3)
+
+#define GICR_TYPER_CPU_NUMBER(r)       (((r) >> 8) & 0xffff)
+
+#define EPPI_BASE_INTID                        1056
+
+#define GICR_TYPER_NR_PPIS(r)                                          \
+       ({                                                              \
+               unsigned int __ppinum = ((r) >> 27) & 0x1f;             \
+               unsigned int __nr_ppis = 16;                            \
+               if (__ppinum == 1 || __ppinum == 2)                     \
+                       __nr_ppis +=  __ppinum * 32;                    \
+                                                                       \
+               __nr_ppis;                                              \
+        })
+
+#define GICR_WAKER_ProcessorSleep      (1U << 1)
+#define GICR_WAKER_ChildrenAsleep      (1U << 2)
+
+#define GIC_BASER_CACHE_nCnB           0ULL
+#define GIC_BASER_CACHE_SameAsInner    0ULL
+#define GIC_BASER_CACHE_nC             1ULL
+#define GIC_BASER_CACHE_RaWt           2ULL
+#define GIC_BASER_CACHE_RaWb           3ULL
+#define GIC_BASER_CACHE_WaWt           4ULL
+#define GIC_BASER_CACHE_WaWb           5ULL
+#define GIC_BASER_CACHE_RaWaWt         6ULL
+#define GIC_BASER_CACHE_RaWaWb         7ULL
+#define GIC_BASER_CACHE_MASK           7ULL
+#define GIC_BASER_NonShareable         0ULL
+#define GIC_BASER_InnerShareable       1ULL
+#define GIC_BASER_OuterShareable       2ULL
+#define GIC_BASER_SHAREABILITY_MASK    3ULL
+
+#define GIC_BASER_CACHEABILITY(reg, inner_outer, type)                 \
+       (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT)
+
+#define GIC_BASER_SHAREABILITY(reg, type)                              \
+       (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
+
+/* encode a size field of width @w containing @n - 1 units */
+#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
+
+#define GICR_PROPBASER_SHAREABILITY_SHIFT              (10)
+#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT                (7)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT                (56)
+#define GICR_PROPBASER_SHAREABILITY_MASK                               \
+       GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK)
+#define GICR_PROPBASER_INNER_CACHEABILITY_MASK                         \
+       GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK                         \
+       GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK)
+#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PROPBASER_InnerShareable                                  \
+       GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)
+
+#define GICR_PROPBASER_nCnB    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB)
+#define GICR_PROPBASER_nC      GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC)
+#define GICR_PROPBASER_RaWt    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt)
+#define GICR_PROPBASER_RaWb    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)
+#define GICR_PROPBASER_WaWt    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt)
+#define GICR_PROPBASER_WaWb    GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb)
+#define GICR_PROPBASER_RaWaWt  GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt)
+#define GICR_PROPBASER_RaWaWb  GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
+
+#define GICR_PROPBASER_IDBITS_MASK                     (0x1f)
+#define GICR_PROPBASER_ADDRESS(x)      ((x) & GENMASK_ULL(51, 12))
+#define GICR_PENDBASER_ADDRESS(x)      ((x) & GENMASK_ULL(51, 16))
+
+#define GICR_PENDBASER_SHAREABILITY_SHIFT              (10)
+#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT                (7)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT                (56)
+#define GICR_PENDBASER_SHAREABILITY_MASK                               \
+       GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK)
+#define GICR_PENDBASER_INNER_CACHEABILITY_MASK                         \
+       GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK                         \
+       GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK)
+#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PENDBASER_InnerShareable                                  \
+       GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)
+
+#define GICR_PENDBASER_nCnB    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB)
+#define GICR_PENDBASER_nC      GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC)
+#define GICR_PENDBASER_RaWt    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt)
+#define GICR_PENDBASER_RaWb    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)
+#define GICR_PENDBASER_WaWt    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt)
+#define GICR_PENDBASER_WaWb    GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb)
+#define GICR_PENDBASER_RaWaWt  GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt)
+#define GICR_PENDBASER_RaWaWb  GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb)
+
+#define GICR_PENDBASER_PTZ                             BIT_ULL(62)
+
+/*
+ * Re-Distributor registers, offsets from SGI_base
+ */
+#define GICR_IGROUPR0                  GICD_IGROUPR
+#define GICR_ISENABLER0                        GICD_ISENABLER
+#define GICR_ICENABLER0                        GICD_ICENABLER
+#define GICR_ISPENDR0                  GICD_ISPENDR
+#define GICR_ICPENDR0                  GICD_ICPENDR
+#define GICR_ISACTIVER0                        GICD_ISACTIVER
+#define GICR_ICACTIVER0                        GICD_ICACTIVER
+#define GICR_IPRIORITYR0               GICD_IPRIORITYR
+#define GICR_ICFGR0                    GICD_ICFGR
+#define GICR_IGRPMODR0                 GICD_IGRPMODR
+#define GICR_NSACR                     GICD_NSACR
+
+#define GICR_TYPER_PLPIS               (1U << 0)
+#define GICR_TYPER_VLPIS               (1U << 1)
+#define GICR_TYPER_DIRTY               (1U << 2)
+#define GICR_TYPER_DirectLPIS          (1U << 3)
+#define GICR_TYPER_LAST                        (1U << 4)
+#define GICR_TYPER_RVPEID              (1U << 7)
+#define GICR_TYPER_COMMON_LPI_AFF      GENMASK_ULL(25, 24)
+#define GICR_TYPER_AFFINITY            GENMASK_ULL(63, 32)
+
+#define GICR_INVLPIR_INTID             GENMASK_ULL(31, 0)
+#define GICR_INVLPIR_VPEID             GENMASK_ULL(47, 32)
+#define GICR_INVLPIR_V                 GENMASK_ULL(63, 63)
+
+#define GICR_INVALLR_VPEID             GICR_INVLPIR_VPEID
+#define GICR_INVALLR_V                 GICR_INVLPIR_V
+
+#define GIC_V3_REDIST_SIZE             0x20000
+
+#define LPI_PROP_GROUP1                        (1 << 1)
+#define LPI_PROP_ENABLED               (1 << 0)
+
+/*
+ * Re-Distributor registers, offsets from VLPI_base
+ */
+#define GICR_VPROPBASER                        0x0070
+
+#define GICR_VPROPBASER_IDBITS_MASK    0x1f
+
+#define GICR_VPROPBASER_SHAREABILITY_SHIFT             (10)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT       (7)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT       (56)
+
+#define GICR_VPROPBASER_SHAREABILITY_MASK                              \
+       GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK                                \
+       GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK                                \
+       GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
+#define GICR_VPROPBASER_CACHEABILITY_MASK                              \
+       GICR_VPROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPROPBASER_InnerShareable                                 \
+       GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
+
+#define GICR_VPROPBASER_nCnB   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
+#define GICR_VPROPBASER_nC     GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
+#define GICR_VPROPBASER_RaWt   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
+#define GICR_VPROPBASER_RaWb   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb)
+#define GICR_VPROPBASER_WaWt   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
+#define GICR_VPROPBASER_WaWb   GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
+#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
+#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
+
+/*
+ * GICv4.1 VPROPBASER reinvention. A subtle mix between the old
+ * VPROPBASER and ITS_BASER. Just not quite any of the two.
+ */
+#define GICR_VPROPBASER_4_1_VALID      (1ULL << 63)
+#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59)
+#define GICR_VPROPBASER_4_1_INDIRECT   (1ULL << 55)
+#define GICR_VPROPBASER_4_1_PAGE_SIZE  GENMASK_ULL(54, 53)
+#define GICR_VPROPBASER_4_1_Z          (1ULL << 52)
+#define GICR_VPROPBASER_4_1_ADDR       GENMASK_ULL(51, 12)
+#define GICR_VPROPBASER_4_1_SIZE       GENMASK_ULL(6, 0)
+
+#define GICR_VPENDBASER                        0x0078
+
+#define GICR_VPENDBASER_SHAREABILITY_SHIFT             (10)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT       (7)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT       (56)
+#define GICR_VPENDBASER_SHAREABILITY_MASK                              \
+       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK                                \
+       GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK                                \
+       GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
+#define GICR_VPENDBASER_CACHEABILITY_MASK                              \
+       GICR_VPENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPENDBASER_NonShareable                                   \
+       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
+
+#define GICR_VPENDBASER_InnerShareable                                 \
+       GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable)
+
+#define GICR_VPENDBASER_nCnB   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
+#define GICR_VPENDBASER_nC     GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
+#define GICR_VPENDBASER_RaWt   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
+#define GICR_VPENDBASER_RaWb   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb)
+#define GICR_VPENDBASER_WaWt   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
+#define GICR_VPENDBASER_WaWb   GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
+#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
+#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
+
+#define GICR_VPENDBASER_Dirty          (1ULL << 60)
+#define GICR_VPENDBASER_PendingLast    (1ULL << 61)
+#define GICR_VPENDBASER_IDAI           (1ULL << 62)
+#define GICR_VPENDBASER_Valid          (1ULL << 63)
+
+/*
+ * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields,
+ * also use the above Valid, PendingLast and Dirty.
+ */
+#define GICR_VPENDBASER_4_1_DB         (1ULL << 62)
+#define GICR_VPENDBASER_4_1_VGRP0EN    (1ULL << 59)
+#define GICR_VPENDBASER_4_1_VGRP1EN    (1ULL << 58)
+#define GICR_VPENDBASER_4_1_VPEID      GENMASK_ULL(15, 0)
+
+#define GICR_VSGIR                     0x0080
+
+#define GICR_VSGIR_VPEID               GENMASK(15, 0)
+
+#define GICR_VSGIPENDR                 0x0088
+
+#define GICR_VSGIPENDR_BUSY            (1U << 31)
+#define GICR_VSGIPENDR_PENDING         GENMASK(15, 0)
+
+/*
+ * ITS registers, offsets from ITS_base
+ */
+#define GITS_CTLR                      0x0000
+#define GITS_IIDR                      0x0004
+#define GITS_TYPER                     0x0008
+#define GITS_MPIDR                     0x0018
+#define GITS_CBASER                    0x0080
+#define GITS_CWRITER                   0x0088
+#define GITS_CREADR                    0x0090
+#define GITS_BASER                     0x0100
+#define GITS_IDREGS_BASE               0xffd0
+#define GITS_PIDR0                     0xffe0
+#define GITS_PIDR1                     0xffe4
+#define GITS_PIDR2                     GICR_PIDR2
+#define GITS_PIDR4                     0xffd0
+#define GITS_CIDR0                     0xfff0
+#define GITS_CIDR1                     0xfff4
+#define GITS_CIDR2                     0xfff8
+#define GITS_CIDR3                     0xfffc
+
+#define GITS_TRANSLATER                        0x10040
+
+#define GITS_SGIR                      0x20020
+
+#define GITS_SGIR_VPEID                        GENMASK_ULL(47, 32)
+#define GITS_SGIR_VINTID               GENMASK_ULL(3, 0)
+
+#define GITS_CTLR_ENABLE               (1U << 0)
+#define GITS_CTLR_ImDe                 (1U << 1)
+#define        GITS_CTLR_ITS_NUMBER_SHIFT      4
+#define        GITS_CTLR_ITS_NUMBER            (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
+#define GITS_CTLR_QUIESCENT            (1U << 31)
+
+#define GITS_TYPER_PLPIS               (1UL << 0)
+#define GITS_TYPER_VLPIS               (1UL << 1)
+#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT        4
+#define GITS_TYPER_ITT_ENTRY_SIZE      GENMASK_ULL(7, 4)
+#define GITS_TYPER_IDBITS_SHIFT                8
+#define GITS_TYPER_DEVBITS_SHIFT       13
+#define GITS_TYPER_DEVBITS             GENMASK_ULL(17, 13)
+#define GITS_TYPER_PTA                 (1UL << 19)
+#define GITS_TYPER_HCC_SHIFT           24
+#define GITS_TYPER_HCC(r)              (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
+#define GITS_TYPER_VMOVP               (1ULL << 37)
+#define GITS_TYPER_VMAPP               (1ULL << 40)
+#define GITS_TYPER_SVPET               GENMASK_ULL(42, 41)
+
+#define GITS_IIDR_REV_SHIFT            12
+#define GITS_IIDR_REV_MASK             (0xf << GITS_IIDR_REV_SHIFT)
+#define GITS_IIDR_REV(r)               (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
+#define GITS_IIDR_PRODUCTID_SHIFT      24
+
+#define GITS_CBASER_VALID                      (1ULL << 63)
+#define GITS_CBASER_SHAREABILITY_SHIFT         (10)
+#define GITS_CBASER_INNER_CACHEABILITY_SHIFT   (59)
+#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT   (53)
+#define GITS_CBASER_SHAREABILITY_MASK                                  \
+       GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK)
+#define GITS_CBASER_INNER_CACHEABILITY_MASK                            \
+       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK)
+#define GITS_CBASER_OUTER_CACHEABILITY_MASK                            \
+       GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK)
+#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK
+
+#define GITS_CBASER_InnerShareable                                     \
+       GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable)
+
+#define GITS_CBASER_nCnB       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB)
+#define GITS_CBASER_nC         GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC)
+#define GITS_CBASER_RaWt       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt)
+#define GITS_CBASER_RaWb       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb)
+#define GITS_CBASER_WaWt       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt)
+#define GITS_CBASER_WaWb       GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb)
+#define GITS_CBASER_RaWaWt     GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
+#define GITS_CBASER_RaWaWb     GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
+
+#define GITS_CBASER_ADDRESS(cbaser)    ((cbaser) & GENMASK_ULL(51, 12))
+
+#define GITS_BASER_NR_REGS             8
+
+#define GITS_BASER_VALID                       (1ULL << 63)
+#define GITS_BASER_INDIRECT                    (1ULL << 62)
+
+#define GITS_BASER_INNER_CACHEABILITY_SHIFT    (59)
+#define GITS_BASER_OUTER_CACHEABILITY_SHIFT    (53)
+#define GITS_BASER_INNER_CACHEABILITY_MASK                             \
+       GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK)
+#define GITS_BASER_CACHEABILITY_MASK           GITS_BASER_INNER_CACHEABILITY_MASK
+#define GITS_BASER_OUTER_CACHEABILITY_MASK                             \
+       GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK)
+#define GITS_BASER_SHAREABILITY_MASK                                   \
+       GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK)
+
+#define GITS_BASER_nCnB                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB)
+#define GITS_BASER_nC          GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC)
+#define GITS_BASER_RaWt                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt)
+#define GITS_BASER_RaWb                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)
+#define GITS_BASER_WaWt                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt)
+#define GITS_BASER_WaWb                GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb)
+#define GITS_BASER_RaWaWt      GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt)
+#define GITS_BASER_RaWaWb      GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb)
+
+#define GITS_BASER_TYPE_SHIFT                  (56)
+#define GITS_BASER_TYPE(r)             (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
+#define GITS_BASER_ENTRY_SIZE_SHIFT            (48)
+#define GITS_BASER_ENTRY_SIZE(r)       ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
+#define GITS_BASER_ENTRY_SIZE_MASK     GENMASK_ULL(52, 48)
+#define GITS_BASER_PHYS_52_to_48(phys)                                 \
+       (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
+#define GITS_BASER_ADDR_48_to_52(baser)                                        \
+       (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
+
+#define GITS_BASER_SHAREABILITY_SHIFT  (10)
+#define GITS_BASER_InnerShareable                                      \
+       GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
+#define GITS_BASER_PAGE_SIZE_SHIFT     (8)
+#define __GITS_BASER_PSZ(sz)           (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_4K                __GITS_BASER_PSZ(4K)
+#define GITS_BASER_PAGE_SIZE_16K       __GITS_BASER_PSZ(16K)
+#define GITS_BASER_PAGE_SIZE_64K       __GITS_BASER_PSZ(64K)
+#define GITS_BASER_PAGE_SIZE_MASK      __GITS_BASER_PSZ(MASK)
+#define GITS_BASER_PAGES_MAX           256
+#define GITS_BASER_PAGES_SHIFT         (0)
+#define GITS_BASER_NR_PAGES(r)         (((r) & 0xff) + 1)
+
+#define GITS_BASER_TYPE_NONE           0
+#define GITS_BASER_TYPE_DEVICE         1
+#define GITS_BASER_TYPE_VCPU           2
+#define GITS_BASER_TYPE_RESERVED3      3
+#define GITS_BASER_TYPE_COLLECTION     4
+#define GITS_BASER_TYPE_RESERVED5      5
+#define GITS_BASER_TYPE_RESERVED6      6
+#define GITS_BASER_TYPE_RESERVED7      7
+
+#define GITS_LVL1_ENTRY_SIZE           (8UL)
+
+/*
+ * ITS commands
+ */
+#define GITS_CMD_MAPD                  0x08
+#define GITS_CMD_MAPC                  0x09
+#define GITS_CMD_MAPTI                 0x0a
+#define GITS_CMD_MAPI                  0x0b
+#define GITS_CMD_MOVI                  0x01
+#define GITS_CMD_DISCARD               0x0f
+#define GITS_CMD_INV                   0x0c
+#define GITS_CMD_MOVALL                        0x0e
+#define GITS_CMD_INVALL                        0x0d
+#define GITS_CMD_INT                   0x03
+#define GITS_CMD_CLEAR                 0x04
+#define GITS_CMD_SYNC                  0x05
+
+/*
+ * GICv4 ITS specific commands
+ */
+#define GITS_CMD_GICv4(x)              ((x) | 0x20)
+#define GITS_CMD_VINVALL               GITS_CMD_GICv4(GITS_CMD_INVALL)
+#define GITS_CMD_VMAPP                 GITS_CMD_GICv4(GITS_CMD_MAPC)
+#define GITS_CMD_VMAPTI                        GITS_CMD_GICv4(GITS_CMD_MAPTI)
+#define GITS_CMD_VMOVI                 GITS_CMD_GICv4(GITS_CMD_MOVI)
+#define GITS_CMD_VSYNC                 GITS_CMD_GICv4(GITS_CMD_SYNC)
+/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */
+#define GITS_CMD_VMOVP                 GITS_CMD_GICv4(2)
+#define GITS_CMD_VSGI                  GITS_CMD_GICv4(3)
+#define GITS_CMD_INVDB                 GITS_CMD_GICv4(0xe)
+
+/*
+ * ITS error numbers
+ */
+#define E_ITS_MOVI_UNMAPPED_INTERRUPT          0x010107
+#define E_ITS_MOVI_UNMAPPED_COLLECTION         0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT           0x010307
+#define E_ITS_CLEAR_UNMAPPED_INTERRUPT         0x010507
+#define E_ITS_MAPD_DEVICE_OOR                  0x010801
+#define E_ITS_MAPD_ITTSIZE_OOR                 0x010802
+#define E_ITS_MAPC_PROCNUM_OOR                 0x010902
+#define E_ITS_MAPC_COLLECTION_OOR              0x010903
+#define E_ITS_MAPTI_UNMAPPED_DEVICE            0x010a04
+#define E_ITS_MAPTI_ID_OOR                     0x010a05
+#define E_ITS_MAPTI_PHYSICALID_OOR             0x010a06
+#define E_ITS_INV_UNMAPPED_INTERRUPT           0x010c07
+#define E_ITS_INVALL_UNMAPPED_COLLECTION       0x010d09
+#define E_ITS_MOVALL_PROCNUM_OOR               0x010e01
+#define E_ITS_DISCARD_UNMAPPED_INTERRUPT       0x010f07
+
+/*
+ * CPU interface registers
+ */
+#define ICC_CTLR_EL1_EOImode_SHIFT     (1)
+#define ICC_CTLR_EL1_EOImode_drop_dir  (0U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_drop      (1U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_MASK      (1 << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_CBPR_SHIFT                0
+#define ICC_CTLR_EL1_CBPR_MASK         (1 << ICC_CTLR_EL1_CBPR_SHIFT)
+#define ICC_CTLR_EL1_PMHE_SHIFT                6
+#define ICC_CTLR_EL1_PMHE_MASK         (1 << ICC_CTLR_EL1_PMHE_SHIFT)
+#define ICC_CTLR_EL1_PRI_BITS_SHIFT    8
+#define ICC_CTLR_EL1_PRI_BITS_MASK     (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
+#define ICC_CTLR_EL1_ID_BITS_SHIFT     11
+#define ICC_CTLR_EL1_ID_BITS_MASK      (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
+#define ICC_CTLR_EL1_SEIS_SHIFT                14
+#define ICC_CTLR_EL1_SEIS_MASK         (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
+#define ICC_CTLR_EL1_A3V_SHIFT         15
+#define ICC_CTLR_EL1_A3V_MASK          (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
+#define ICC_CTLR_EL1_RSS               (0x1 << 18)
+#define ICC_CTLR_EL1_ExtRange          (0x1 << 19)
+#define ICC_PMR_EL1_SHIFT              0
+#define ICC_PMR_EL1_MASK               (0xff << ICC_PMR_EL1_SHIFT)
+#define ICC_BPR0_EL1_SHIFT             0
+#define ICC_BPR0_EL1_MASK              (0x7 << ICC_BPR0_EL1_SHIFT)
+#define ICC_BPR1_EL1_SHIFT             0
+#define ICC_BPR1_EL1_MASK              (0x7 << ICC_BPR1_EL1_SHIFT)
+#define ICC_IGRPEN0_EL1_SHIFT          0
+#define ICC_IGRPEN0_EL1_MASK           (1 << ICC_IGRPEN0_EL1_SHIFT)
+#define ICC_IGRPEN1_EL1_SHIFT          0
+#define ICC_IGRPEN1_EL1_MASK           (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB                        (1U << 2)
+#define ICC_SRE_EL1_DFB                        (1U << 1)
+#define ICC_SRE_EL1_SRE                        (1U << 0)
+
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID              (0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT     (10)
+#define GICH_LR_PHYSID_CPUID           (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+#define ICC_IAR1_EL1_SPURIOUS          0x3ff
+
+#define ICC_SRE_EL2_SRE                        (1 << 0)
+#define ICC_SRE_EL2_ENABLE             (1 << 3)
+
+#define ICC_SGI1R_TARGET_LIST_SHIFT    0
+#define ICC_SGI1R_TARGET_LIST_MASK     (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
+#define ICC_SGI1R_AFFINITY_1_SHIFT     16
+#define ICC_SGI1R_AFFINITY_1_MASK      (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
+#define ICC_SGI1R_SGI_ID_SHIFT         24
+#define ICC_SGI1R_SGI_ID_MASK          (0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
+#define ICC_SGI1R_AFFINITY_2_SHIFT     32
+#define ICC_SGI1R_AFFINITY_2_MASK      (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT)
+#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
+#define ICC_SGI1R_RS_SHIFT             44
+#define ICC_SGI1R_RS_MASK              (0xfULL << ICC_SGI1R_RS_SHIFT)
+#define ICC_SGI1R_AFFINITY_3_SHIFT     48
+#define ICC_SGI1R_AFFINITY_3_MASK      (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT)
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
new file mode 100644 (file)
index 0000000..3722ed9
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTESTS_GIC_V3_ITS_H__
+#define __SELFTESTS_GIC_V3_ITS_H__
+
+#include <linux/sizes.h>
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+             vm_paddr_t device_tbl, size_t device_tbl_sz,
+             vm_paddr_t cmdq, size_t cmdq_size);
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+                      size_t itt_size, bool valid);
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+                       u32 collection_id, u32 intid);
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+
+#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
new file mode 100644 (file)
index 0000000..e43a57d
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
new file mode 100644 (file)
index 0000000..1e8d0d5
--- /dev/null
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch64 processor specific defines
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <asm/brk-imm.h>
+#include <asm/esr.h>
+#include <asm/sysreg.h>
+
+
+#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+                          KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+/*
+ * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
+ * SYS_* register definitions in asm/sysreg.h to use in KVM
+ * calls such as vcpu_get_reg() and vcpu_set_reg().
+ */
+#define KVM_ARM64_SYS_REG(sys_reg_id)                  \
+       ARM64_SYS_REG(sys_reg_Op0(sys_reg_id),          \
+                       sys_reg_Op1(sys_reg_id),        \
+                       sys_reg_CRn(sys_reg_id),        \
+                       sys_reg_CRm(sys_reg_id),        \
+                       sys_reg_Op2(sys_reg_id))
+
+/*
+ * Default MAIR
+ *                  index   attribute
+ * DEVICE_nGnRnE      0     0000:0000
+ * DEVICE_nGnRE       1     0000:0100
+ * DEVICE_GRE         2     0000:1100
+ * NORMAL_NC          3     0100:0100
+ * NORMAL             4     1111:1111
+ * NORMAL_WT          5     1011:1011
+ */
+
+/* Linux doesn't use these memory types, so let's define them. */
+#define MAIR_ATTR_DEVICE_GRE   UL(0x0c)
+#define MAIR_ATTR_NORMAL_WT    UL(0xbb)
+
+#define MT_DEVICE_nGnRnE       0
+#define MT_DEVICE_nGnRE                1
+#define MT_DEVICE_GRE          2
+#define MT_NORMAL_NC           3
+#define MT_NORMAL              4
+#define MT_NORMAL_WT           5
+
+#define DEFAULT_MAIR_EL1                                                       \
+       (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |              \
+        MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) |                \
+        MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) |                    \
+        MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) |                      \
+        MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) |                            \
+        MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                 struct kvm_vcpu_init *init, void *guest_code);
+
+struct ex_regs {
+       u64 regs[31];
+       u64 sp;
+       u64 pc;
+       u64 pstate;
+};
+
+#define VECTOR_NUM     16
+
+enum {
+       VECTOR_SYNC_CURRENT_SP0,
+       VECTOR_IRQ_CURRENT_SP0,
+       VECTOR_FIQ_CURRENT_SP0,
+       VECTOR_ERROR_CURRENT_SP0,
+
+       VECTOR_SYNC_CURRENT,
+       VECTOR_IRQ_CURRENT,
+       VECTOR_FIQ_CURRENT,
+       VECTOR_ERROR_CURRENT,
+
+       VECTOR_SYNC_LOWER_64,
+       VECTOR_IRQ_LOWER_64,
+       VECTOR_FIQ_LOWER_64,
+       VECTOR_ERROR_LOWER_64,
+
+       VECTOR_SYNC_LOWER_32,
+       VECTOR_IRQ_LOWER_32,
+       VECTOR_FIQ_LOWER_32,
+       VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+                          (v) == VECTOR_SYNC_CURRENT     || \
+                          (v) == VECTOR_SYNC_LOWER_64    || \
+                          (v) == VECTOR_SYNC_LOWER_32)
+
+/* Access flag */
+#define PTE_AF                 (1ULL << 10)
+
+/* Access flag update enable/disable */
+#define TCR_EL1_HA             (1ULL << 39)
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+                                       uint32_t *ipa16k, uint32_t *ipa64k);
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+               int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+               int vector, int ec, handler_fn handler);
+
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline void cpu_relax(void)
+{
+       asm volatile("yield" ::: "memory");
+}
+
+#define isb()          asm volatile("isb" : : : "memory")
+#define dsb(opt)       asm volatile("dsb " #opt : : : "memory")
+#define dmb(opt)       asm volatile("dmb " #opt : : : "memory")
+
+#define dma_wmb()      dmb(oshst)
+#define __iowmb()      dma_wmb()
+
+#define dma_rmb()      dmb(oshld)
+
+#define __iormb(v)                                                     \
+({                                                                     \
+       unsigned long tmp;                                              \
+                                                                       \
+       dma_rmb();                                                      \
+                                                                       \
+       /*                                                              \
+        * Courtesy of arch/arm64/include/asm/io.h:                     \
+        * Create a dummy control dependency from the IO read to any    \
+        * later instructions. This ensures that a subsequent call      \
+        * to udelay() will be ordered due to the ISB in __delay().     \
+        */                                                             \
+       asm volatile("eor       %0, %1, %1\n"                           \
+                    "cbnz      %0, ."                                  \
+                    : "=r" (tmp) : "r" ((unsigned long)(v))            \
+                    : "memory");                                       \
+})
+
+static __always_inline void __raw_writel(u32 val, volatile void *addr)
+{
+       asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u32 __raw_readl(const volatile void *addr)
+{
+       u32 val;
+       asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
+       return val;
+}
+
+static __always_inline void __raw_writeq(u64 val, volatile void *addr)
+{
+       asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u64 __raw_readq(const volatile void *addr)
+{
+       u64 val;
+       asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
+       return val;
+}
+
+#define writel_relaxed(v,c)    ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
+#define readl_relaxed(c)       ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+#define writeq_relaxed(v,c)    ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
+#define readq_relaxed(c)       ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
+
+#define writel(v,c)            ({ __iowmb(); writel_relaxed((v),(c));})
+#define readl(c)               ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+#define writeq(v,c)            ({ __iowmb(); writeq_relaxed((v),(c));})
+#define readq(c)               ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
+
+
+static inline void local_irq_enable(void)
+{
+       asm volatile("msr daifclr, #3" : : : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+       asm volatile("msr daifset, #3" : : : "memory");
+}
+
+/**
+ * struct arm_smccc_res - Result from SMC/HVC call
+ * @a0-a3 result values from registers 0 to 3
+ */
+struct arm_smccc_res {
+       unsigned long a0;
+       unsigned long a1;
+       unsigned long a2;
+       unsigned long a3;
+};
+
+/**
+ * smccc_hvc - Invoke a SMCCC function using the hvc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+              uint64_t arg6, struct arm_smccc_res *res);
+
+/**
+ * smccc_smc - Invoke a SMCCC function using the smc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+              uint64_t arg6, struct arm_smccc_res *res);
+
+/* Execute a Wait For Interrupt instruction. */
+void wfi(void);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h
new file mode 100644 (file)
index 0000000..cf09841
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
+#define SELFTEST_KVM_ARM64_SPINLOCK_H
+
+struct spinlock {
+       int v;
+};
+
+extern void spin_lock(struct spinlock *lock);
+extern void spin_unlock(struct spinlock *lock);
+
+#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h
new file mode 100644 (file)
index 0000000..4ec801f
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h
new file mode 100644 (file)
index 0000000..c481d0c
--- /dev/null
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) host specific defines
+ */
+
+#ifndef SELFTEST_KVM_VGIC_H
+#define SELFTEST_KVM_VGIC_H
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+
+#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
+       (((uint64_t)(count) << 52) | \
+       ((uint64_t)((base) >> 16) << 16) | \
+       ((uint64_t)(flags) << 12) | \
+       index)
+
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
+
+#define VGIC_MAX_RESERVED      1023
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+
+/* The vcpu arg only applies to private interrupts. */
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+
+#define KVM_IRQCHIP_NUM_PINS   (1020 - 32)
+
+int vgic_its_setup(struct kvm_vm *vm);
+
+#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/s390/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h
new file mode 100644 (file)
index 0000000..1bf2756
--- /dev/null
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Definition for kernel virtual machines on s390x
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_DEBUG_PRINT_H
+#define SELFTEST_KVM_DEBUG_PRINT_H
+
+#include "asm/ptrace.h"
+#include "kvm_util.h"
+#include "sie.h"
+
+static inline void print_hex_bytes(const char *name, u64 addr, size_t len)
+{
+       u64 pos;
+
+       pr_debug("%s (%p)\n", name, (void *)addr);
+       pr_debug("            0/0x00---------|");
+       if (len > 8)
+               pr_debug(" 8/0x08---------|");
+       if (len > 16)
+               pr_debug(" 16/0x10--------|");
+       if (len > 24)
+               pr_debug(" 24/0x18--------|");
+       for (pos = 0; pos < len; pos += 8) {
+               if ((pos % 32) == 0)
+                       pr_debug("\n %3lu 0x%.3lx ", pos, pos);
+               pr_debug(" %16lx", *((u64 *)(addr + pos)));
+       }
+       pr_debug("\n");
+}
+
+static inline void print_hex(const char *name, u64 addr)
+{
+       print_hex_bytes(name, addr, 512);
+}
+
+static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+       pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n",
+                run->flags,
+                run->psw_mask, run->psw_addr,
+                run->exit_reason, exit_reason_str(run->exit_reason));
+       pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n",
+                sie_block->psw_mask, sie_block->psw_addr);
+}
+
+static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
+{
+       print_hex_bytes("run", (u64)run, 0x150);
+       print_hex("sie_block", (u64)sie_block);
+       print_psw(run, sie_block);
+}
+
+static inline void print_regs(struct kvm_run *run)
+{
+       struct kvm_sync_regs *sync_regs = &run->s.regs;
+
+       print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS);
+       print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS);
+       print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS);
+}
+
+#endif /* SELFTEST_KVM_DEBUG_PRINT_H */
diff --git a/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
new file mode 100644 (file)
index 0000000..b0ed713
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
+#define SELFTEST_KVM_DIAG318_TEST_HANDLER
+
+uint64_t get_diag318_info(void);
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h
new file mode 100644 (file)
index 0000000..00a1ced
--- /dev/null
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * Get the facility bits with the STFLE instruction
+ */
+
+#ifndef SELFTEST_KVM_FACILITY_H
+#define SELFTEST_KVM_FACILITY_H
+
+#include <linux/bitops.h>
+
+/* alt_stfle_fac_list[16] + stfle_fac_list[16] */
+#define NB_STFL_DOUBLEWORDS 32
+
+extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+extern bool stfle_flag;
+
+static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
+{
+       return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void stfle(uint64_t *fac, unsigned int nb_doublewords)
+{
+       register unsigned long r0 asm("0") = nb_doublewords - 1;
+
+       asm volatile("  .insn   s,0xb2b00000,0(%1)\n"
+                       : "+d" (r0)
+                       : "a" (fac)
+                       : "memory", "cc");
+}
+
+static inline void setup_facilities(void)
+{
+       stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS);
+       stfle_flag = true;
+}
+
+static inline bool test_facility(int nr)
+{
+       if (!stfle_flag)
+               setup_facilities();
+       return test_bit_inv(nr, stfl_doublewords);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
new file mode 100644 (file)
index 0000000..e43a57d
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/s390/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h
new file mode 100644 (file)
index 0000000..33fef6f
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * s390x processor specific defines
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <linux/compiler.h>
+
+/* Bits in the region/segment table entry */
+#define REGION_ENTRY_ORIGIN    ~0xfffUL /* region/segment table origin    */
+#define REGION_ENTRY_PROTECT   0x200    /* region protection bit          */
+#define REGION_ENTRY_NOEXEC    0x100    /* region no-execute bit          */
+#define REGION_ENTRY_OFFSET    0xc0     /* region table offset            */
+#define REGION_ENTRY_INVALID   0x20     /* invalid region table entry     */
+#define REGION_ENTRY_TYPE      0x0c     /* region/segment table type mask */
+#define REGION_ENTRY_LENGTH    0x03     /* region third length            */
+
+/* Bits in the page table entry */
+#define PAGE_INVALID   0x400           /* HW invalid bit    */
+#define PAGE_PROTECT   0x200           /* HW read-only bit  */
+#define PAGE_NOEXEC    0x100           /* HW no-execute bit */
+
+/* Page size definitions */
+#define PAGE_SHIFT 12
+#define PAGE_SIZE BIT_ULL(PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+/* Is there a portable way to do this? */
+static inline void cpu_relax(void)
+{
+       barrier();
+}
+
+/* Get the instruction length */
+static inline int insn_length(unsigned char code)
+{
+       return ((((int)code + 64) >> 7) + 1) << 1;
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h
new file mode 100644 (file)
index 0000000..160acd4
--- /dev/null
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definition for kernel virtual machines on s390.
+ *
+ * Adapted copy of struct definition kvm_s390_sie_block from
+ * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs.
+ *
+ * Copyright IBM Corp. 2008, 2024
+ *
+ * Authors:
+ *  Christoph Schlameuss <schlameuss@linux.ibm.com>
+ *  Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef SELFTEST_KVM_SIE_H
+#define SELFTEST_KVM_SIE_H
+
+#include <linux/types.h>
+
+struct kvm_s390_sie_block {
+#define CPUSTAT_STOPPED    0x80000000
+#define CPUSTAT_WAIT       0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT   0x04000000
+#define CPUSTAT_IO_INT     0x02000000
+#define CPUSTAT_EXT_INT    0x01000000
+#define CPUSTAT_RUNNING    0x00800000
+#define CPUSTAT_RETAINED   0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB    0x00010000
+#define CPUSTAT_RRF        0x00008000
+#define CPUSTAT_SLSV       0x00004000
+#define CPUSTAT_SLSR       0x00002000
+#define CPUSTAT_ZARCH      0x00000800
+#define CPUSTAT_MCDS       0x00000100
+#define CPUSTAT_KSS        0x00000200
+#define CPUSTAT_SM         0x00000080
+#define CPUSTAT_IBS        0x00000040
+#define CPUSTAT_GED2       0x00000010
+#define CPUSTAT_G          0x00000008
+#define CPUSTAT_GED        0x00000004
+#define CPUSTAT_J          0x00000002
+#define CPUSTAT_P          0x00000001
+       __u32 cpuflags;                 /* 0x0000 */
+       __u32: 1;                       /* 0x0004 */
+       __u32 prefix : 18;
+       __u32: 1;
+       __u32 ibc : 12;
+       __u8    reserved08[4];          /* 0x0008 */
+#define PROG_IN_SIE BIT(0)
+       __u32   prog0c;                 /* 0x000c */
+       union {
+               __u8    reserved10[16]; /* 0x0010 */
+               struct {
+                       __u64   pv_handle_cpu;
+                       __u64   pv_handle_config;
+               };
+       };
+#define PROG_BLOCK_SIE BIT(0)
+#define PROG_REQUEST   BIT(1)
+       __u32   prog20;                 /* 0x0020 */
+       __u8    reserved24[4];          /* 0x0024 */
+       __u64   cputm;                  /* 0x0028 */
+       __u64   ckc;                    /* 0x0030 */
+       __u64   epoch;                  /* 0x0038 */
+       __u32   svcc;                   /* 0x0040 */
+#define LCTL_CR0       0x8000
+#define LCTL_CR6       0x0200
+#define LCTL_CR9       0x0040
+#define LCTL_CR10      0x0020
+#define LCTL_CR11      0x0010
+#define LCTL_CR14      0x0002
+       __u16   lctl;                   /* 0x0044 */
+       __s16   icpua;                  /* 0x0046 */
+#define ICTL_OPEREXC   0x80000000
+#define ICTL_PINT      0x20000000
+#define ICTL_LPSW      0x00400000
+#define ICTL_STCTL     0x00040000
+#define ICTL_ISKE      0x00004000
+#define ICTL_SSKE      0x00002000
+#define ICTL_RRBE      0x00001000
+#define ICTL_TPROT     0x00000200
+       __u32   ictl;                   /* 0x0048 */
+#define ECA_CEI                0x80000000
+#define ECA_IB         0x40000000
+#define ECA_SIGPI      0x10000000
+#define ECA_MVPGI      0x01000000
+#define ECA_AIV                0x00200000
+#define ECA_VX         0x00020000
+#define ECA_PROTEXCI   0x00002000
+#define ECA_APIE       0x00000008
+#define ECA_SII                0x00000001
+       __u32   eca;                    /* 0x004c */
+#define ICPT_INST      0x04
+#define ICPT_PROGI     0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ    0x10
+#define ICPT_EXTINT    0x14
+#define ICPT_IOREQ     0x18
+#define ICPT_WAIT      0x1c
+#define ICPT_VALIDITY  0x20
+#define ICPT_STOP      0x28
+#define ICPT_OPEREXC   0x2C
+#define ICPT_PARTEXEC  0x38
+#define ICPT_IOINST    0x40
+#define ICPT_KSS       0x5c
+#define ICPT_MCHKREQ   0x60
+#define ICPT_INT_ENABLE        0x64
+#define ICPT_PV_INSTR  0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF   0x70
+       __u8    icptcode;               /* 0x0050 */
+       __u8    icptstatus;             /* 0x0051 */
+       __u16   ihcpu;                  /* 0x0052 */
+       __u8    reserved54;             /* 0x0054 */
+#define IICTL_CODE_NONE                 0x00
+#define IICTL_CODE_MCHK                 0x01
+#define IICTL_CODE_EXT          0x02
+#define IICTL_CODE_IO           0x03
+#define IICTL_CODE_RESTART      0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND      0x11
+       __u8    iictl;                  /* 0x0055 */
+       __u16   ipa;                    /* 0x0056 */
+       __u32   ipb;                    /* 0x0058 */
+       __u32   scaoh;                  /* 0x005c */
+#define FPF_BPBC       0x20
+       __u8    fpf;                    /* 0x0060 */
+#define ECB_GS         0x40
+#define ECB_TE         0x10
+#define ECB_SPECI      0x08
+#define ECB_SRSI       0x04
+#define ECB_HOSTPROTINT        0x02
+#define ECB_PTF                0x01
+       __u8    ecb;                    /* 0x0061 */
+#define ECB2_CMMA      0x80
+#define ECB2_IEP       0x20
+#define ECB2_PFMFI     0x08
+#define ECB2_ESCA      0x04
+#define ECB2_ZPCI_LSI  0x02
+       __u8    ecb2;                   /* 0x0062 */
+#define ECB3_AISI      0x20
+#define ECB3_AISII     0x10
+#define ECB3_DEA       0x08
+#define ECB3_AES       0x04
+#define ECB3_RI                0x01
+       __u8    ecb3;                   /* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
+       __u32   scaol;                  /* 0x0064 */
+       __u8    sdf;                    /* 0x0068 */
+       __u8    epdx;                   /* 0x0069 */
+       __u8    cpnc;                   /* 0x006a */
+       __u8    reserved6b;             /* 0x006b */
+       __u32   todpr;                  /* 0x006c */
+#define GISA_FORMAT1 0x00000001
+       __u32   gd;                     /* 0x0070 */
+       __u8    reserved74[12];         /* 0x0074 */
+       __u64   mso;                    /* 0x0080 */
+       __u64   msl;                    /* 0x0088 */
+       __u64   psw_mask;               /* 0x0090 */
+       __u64   psw_addr;               /* 0x0098 */
+       __u64   gg14;                   /* 0x00a0 */
+       __u64   gg15;                   /* 0x00a8 */
+       __u8    reservedb0[8];          /* 0x00b0 */
+#define HPID_KVM       0x4
+#define HPID_VSIE      0x5
+       __u8    hpid;                   /* 0x00b8 */
+       __u8    reservedb9[7];          /* 0x00b9 */
+       union {
+               struct {
+                       __u32   eiparams;       /* 0x00c0 */
+                       __u16   extcpuaddr;     /* 0x00c4 */
+                       __u16   eic;            /* 0x00c6 */
+               };
+               __u64   mcic;                   /* 0x00c0 */
+       } __packed;
+       __u32   reservedc8;             /* 0x00c8 */
+       union {
+               struct {
+                       __u16   pgmilc;         /* 0x00cc */
+                       __u16   iprcc;          /* 0x00ce */
+               };
+               __u32   edc;                    /* 0x00cc */
+       } __packed;
+       union {
+               struct {
+                       __u32   dxc;            /* 0x00d0 */
+                       __u16   mcn;            /* 0x00d4 */
+                       __u8    perc;           /* 0x00d6 */
+                       __u8    peratmid;       /* 0x00d7 */
+               };
+               __u64   faddr;                  /* 0x00d0 */
+       } __packed;
+       __u64   peraddr;                /* 0x00d8 */
+       __u8    eai;                    /* 0x00e0 */
+       __u8    peraid;                 /* 0x00e1 */
+       __u8    oai;                    /* 0x00e2 */
+       __u8    armid;                  /* 0x00e3 */
+       __u8    reservede4[4];          /* 0x00e4 */
+       union {
+               __u64   tecmc;          /* 0x00e8 */
+               struct {
+                       __u16   subchannel_id;  /* 0x00e8 */
+                       __u16   subchannel_nr;  /* 0x00ea */
+                       __u32   io_int_parm;    /* 0x00ec */
+                       __u32   io_int_word;    /* 0x00f0 */
+               };
+       } __packed;
+       __u8    reservedf4[8];          /* 0x00f4 */
+#define CRYCB_FORMAT_MASK      0x00000003
+#define CRYCB_FORMAT0          0x00000000
+#define CRYCB_FORMAT1          0x00000001
+#define CRYCB_FORMAT2          0x00000003
+       __u32   crycbd;                 /* 0x00fc */
+       __u64   gcr[16];                /* 0x0100 */
+       union {
+               __u64   gbea;           /* 0x0180 */
+               __u64   sidad;
+       };
+       __u8    reserved188[8];         /* 0x0188 */
+       __u64   sdnxo;                  /* 0x0190 */
+       __u8    reserved198[8];         /* 0x0198 */
+       __u32   fac;                    /* 0x01a0 */
+       __u8    reserved1a4[20];        /* 0x01a4 */
+       __u64   cbrlo;                  /* 0x01b8 */
+       __u8    reserved1c0[8];         /* 0x01c0 */
+#define ECD_HOSTREGMGMT        0x20000000
+#define ECD_MEF                0x08000000
+#define ECD_ETOKENF    0x02000000
+#define ECD_ECC                0x00200000
+       __u32   ecd;                    /* 0x01c8 */
+       __u8    reserved1cc[18];        /* 0x01cc */
+       __u64   pp;                     /* 0x01de */
+       __u8    reserved1e6[2];         /* 0x01e6 */
+       __u64   itdba;                  /* 0x01e8 */
+       __u64   riccbd;                 /* 0x01f0 */
+       __u64   gvrd;                   /* 0x01f8 */
+} __packed __aligned(512);
+
+#endif /* SELFTEST_KVM_SIE_H */
diff --git a/tools/testing/selftests/kvm/include/s390/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h
new file mode 100644 (file)
index 0000000..8035a87
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_S390_SIEIC
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390x/debug_print.h
deleted file mode 100644 (file)
index 1bf2756..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Definition for kernel virtual machines on s390x
- *
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Christoph Schlameuss <schlameuss@linux.ibm.com>
- */
-
-#ifndef SELFTEST_KVM_DEBUG_PRINT_H
-#define SELFTEST_KVM_DEBUG_PRINT_H
-
-#include "asm/ptrace.h"
-#include "kvm_util.h"
-#include "sie.h"
-
-static inline void print_hex_bytes(const char *name, u64 addr, size_t len)
-{
-       u64 pos;
-
-       pr_debug("%s (%p)\n", name, (void *)addr);
-       pr_debug("            0/0x00---------|");
-       if (len > 8)
-               pr_debug(" 8/0x08---------|");
-       if (len > 16)
-               pr_debug(" 16/0x10--------|");
-       if (len > 24)
-               pr_debug(" 24/0x18--------|");
-       for (pos = 0; pos < len; pos += 8) {
-               if ((pos % 32) == 0)
-                       pr_debug("\n %3lu 0x%.3lx ", pos, pos);
-               pr_debug(" %16lx", *((u64 *)(addr + pos)));
-       }
-       pr_debug("\n");
-}
-
-static inline void print_hex(const char *name, u64 addr)
-{
-       print_hex_bytes(name, addr, 512);
-}
-
-static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
-{
-       pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n",
-                run->flags,
-                run->psw_mask, run->psw_addr,
-                run->exit_reason, exit_reason_str(run->exit_reason));
-       pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n",
-                sie_block->psw_mask, sie_block->psw_addr);
-}
-
-static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block)
-{
-       print_hex_bytes("run", (u64)run, 0x150);
-       print_hex("sie_block", (u64)sie_block);
-       print_psw(run, sie_block);
-}
-
-static inline void print_regs(struct kvm_run *run)
-{
-       struct kvm_sync_regs *sync_regs = &run->s.regs;
-
-       print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS);
-       print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS);
-       print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS);
-}
-
-#endif /* SELFTEST_KVM_DEBUG_PRINT_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
deleted file mode 100644 (file)
index b0ed713..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- *
- * Test handler for the s390x DIAGNOSE 0x0318 instruction.
- *
- * Copyright (C) 2020, IBM
- */
-
-#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
-#define SELFTEST_KVM_DIAG318_TEST_HANDLER
-
-uint64_t get_diag318_info(void);
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390x/facility.h
deleted file mode 100644 (file)
index 00a1ced..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Hariharan Mari <hari55@linux.ibm.com>
- *
- * Get the facility bits with the STFLE instruction
- */
-
-#ifndef SELFTEST_KVM_FACILITY_H
-#define SELFTEST_KVM_FACILITY_H
-
-#include <linux/bitops.h>
-
-/* alt_stfle_fac_list[16] + stfle_fac_list[16] */
-#define NB_STFL_DOUBLEWORDS 32
-
-extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
-extern bool stfle_flag;
-
-static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
-{
-       return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
-}
-
-static inline void stfle(uint64_t *fac, unsigned int nb_doublewords)
-{
-       register unsigned long r0 asm("0") = nb_doublewords - 1;
-
-       asm volatile("  .insn   s,0xb2b00000,0(%1)\n"
-                       : "+d" (r0)
-                       : "a" (fac)
-                       : "memory", "cc");
-}
-
-static inline void setup_facilities(void)
-{
-       stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS);
-       stfle_flag = true;
-}
-
-static inline bool test_facility(int nr)
-{
-       if (!stfle_flag)
-               setup_facilities();
-       return test_bit_inv(nr, stfl_doublewords);
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h
deleted file mode 100644 (file)
index e43a57d..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UTIL_ARCH_H
-#define SELFTEST_KVM_UTIL_ARCH_H
-
-struct kvm_vm_arch {};
-
-#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h
deleted file mode 100644 (file)
index 33fef6f..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * s390x processor specific defines
- */
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include <linux/compiler.h>
-
-/* Bits in the region/segment table entry */
-#define REGION_ENTRY_ORIGIN    ~0xfffUL /* region/segment table origin    */
-#define REGION_ENTRY_PROTECT   0x200    /* region protection bit          */
-#define REGION_ENTRY_NOEXEC    0x100    /* region no-execute bit          */
-#define REGION_ENTRY_OFFSET    0xc0     /* region table offset            */
-#define REGION_ENTRY_INVALID   0x20     /* invalid region table entry     */
-#define REGION_ENTRY_TYPE      0x0c     /* region/segment table type mask */
-#define REGION_ENTRY_LENGTH    0x03     /* region third length            */
-
-/* Bits in the page table entry */
-#define PAGE_INVALID   0x400           /* HW invalid bit    */
-#define PAGE_PROTECT   0x200           /* HW read-only bit  */
-#define PAGE_NOEXEC    0x100           /* HW no-execute bit */
-
-/* Page size definitions */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE BIT_ULL(PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE - 1))
-
-/* Is there a portable way to do this? */
-static inline void cpu_relax(void)
-{
-       barrier();
-}
-
-/* Get the instruction length */
-static inline int insn_length(unsigned char code)
-{
-       return ((((int)code + 64) >> 7) + 1) << 1;
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390x/sie.h
deleted file mode 100644 (file)
index 160acd4..0000000
+++ /dev/null
@@ -1,240 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Definition for kernel virtual machines on s390.
- *
- * Adapted copy of struct definition kvm_s390_sie_block from
- * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs.
- *
- * Copyright IBM Corp. 2008, 2024
- *
- * Authors:
- *  Christoph Schlameuss <schlameuss@linux.ibm.com>
- *  Carsten Otte <cotte@de.ibm.com>
- */
-
-#ifndef SELFTEST_KVM_SIE_H
-#define SELFTEST_KVM_SIE_H
-
-#include <linux/types.h>
-
-struct kvm_s390_sie_block {
-#define CPUSTAT_STOPPED    0x80000000
-#define CPUSTAT_WAIT       0x10000000
-#define CPUSTAT_ECALL_PEND 0x08000000
-#define CPUSTAT_STOP_INT   0x04000000
-#define CPUSTAT_IO_INT     0x02000000
-#define CPUSTAT_EXT_INT    0x01000000
-#define CPUSTAT_RUNNING    0x00800000
-#define CPUSTAT_RETAINED   0x00400000
-#define CPUSTAT_TIMING_SUB 0x00020000
-#define CPUSTAT_SIE_SUB    0x00010000
-#define CPUSTAT_RRF        0x00008000
-#define CPUSTAT_SLSV       0x00004000
-#define CPUSTAT_SLSR       0x00002000
-#define CPUSTAT_ZARCH      0x00000800
-#define CPUSTAT_MCDS       0x00000100
-#define CPUSTAT_KSS        0x00000200
-#define CPUSTAT_SM         0x00000080
-#define CPUSTAT_IBS        0x00000040
-#define CPUSTAT_GED2       0x00000010
-#define CPUSTAT_G          0x00000008
-#define CPUSTAT_GED        0x00000004
-#define CPUSTAT_J          0x00000002
-#define CPUSTAT_P          0x00000001
-       __u32 cpuflags;                 /* 0x0000 */
-       __u32: 1;                       /* 0x0004 */
-       __u32 prefix : 18;
-       __u32: 1;
-       __u32 ibc : 12;
-       __u8    reserved08[4];          /* 0x0008 */
-#define PROG_IN_SIE BIT(0)
-       __u32   prog0c;                 /* 0x000c */
-       union {
-               __u8    reserved10[16]; /* 0x0010 */
-               struct {
-                       __u64   pv_handle_cpu;
-                       __u64   pv_handle_config;
-               };
-       };
-#define PROG_BLOCK_SIE BIT(0)
-#define PROG_REQUEST   BIT(1)
-       __u32   prog20;                 /* 0x0020 */
-       __u8    reserved24[4];          /* 0x0024 */
-       __u64   cputm;                  /* 0x0028 */
-       __u64   ckc;                    /* 0x0030 */
-       __u64   epoch;                  /* 0x0038 */
-       __u32   svcc;                   /* 0x0040 */
-#define LCTL_CR0       0x8000
-#define LCTL_CR6       0x0200
-#define LCTL_CR9       0x0040
-#define LCTL_CR10      0x0020
-#define LCTL_CR11      0x0010
-#define LCTL_CR14      0x0002
-       __u16   lctl;                   /* 0x0044 */
-       __s16   icpua;                  /* 0x0046 */
-#define ICTL_OPEREXC   0x80000000
-#define ICTL_PINT      0x20000000
-#define ICTL_LPSW      0x00400000
-#define ICTL_STCTL     0x00040000
-#define ICTL_ISKE      0x00004000
-#define ICTL_SSKE      0x00002000
-#define ICTL_RRBE      0x00001000
-#define ICTL_TPROT     0x00000200
-       __u32   ictl;                   /* 0x0048 */
-#define ECA_CEI                0x80000000
-#define ECA_IB         0x40000000
-#define ECA_SIGPI      0x10000000
-#define ECA_MVPGI      0x01000000
-#define ECA_AIV                0x00200000
-#define ECA_VX         0x00020000
-#define ECA_PROTEXCI   0x00002000
-#define ECA_APIE       0x00000008
-#define ECA_SII                0x00000001
-       __u32   eca;                    /* 0x004c */
-#define ICPT_INST      0x04
-#define ICPT_PROGI     0x08
-#define ICPT_INSTPROGI 0x0C
-#define ICPT_EXTREQ    0x10
-#define ICPT_EXTINT    0x14
-#define ICPT_IOREQ     0x18
-#define ICPT_WAIT      0x1c
-#define ICPT_VALIDITY  0x20
-#define ICPT_STOP      0x28
-#define ICPT_OPEREXC   0x2C
-#define ICPT_PARTEXEC  0x38
-#define ICPT_IOINST    0x40
-#define ICPT_KSS       0x5c
-#define ICPT_MCHKREQ   0x60
-#define ICPT_INT_ENABLE        0x64
-#define ICPT_PV_INSTR  0x68
-#define ICPT_PV_NOTIFY 0x6c
-#define ICPT_PV_PREF   0x70
-       __u8    icptcode;               /* 0x0050 */
-       __u8    icptstatus;             /* 0x0051 */
-       __u16   ihcpu;                  /* 0x0052 */
-       __u8    reserved54;             /* 0x0054 */
-#define IICTL_CODE_NONE                 0x00
-#define IICTL_CODE_MCHK                 0x01
-#define IICTL_CODE_EXT          0x02
-#define IICTL_CODE_IO           0x03
-#define IICTL_CODE_RESTART      0x04
-#define IICTL_CODE_SPECIFICATION 0x10
-#define IICTL_CODE_OPERAND      0x11
-       __u8    iictl;                  /* 0x0055 */
-       __u16   ipa;                    /* 0x0056 */
-       __u32   ipb;                    /* 0x0058 */
-       __u32   scaoh;                  /* 0x005c */
-#define FPF_BPBC       0x20
-       __u8    fpf;                    /* 0x0060 */
-#define ECB_GS         0x40
-#define ECB_TE         0x10
-#define ECB_SPECI      0x08
-#define ECB_SRSI       0x04
-#define ECB_HOSTPROTINT        0x02
-#define ECB_PTF                0x01
-       __u8    ecb;                    /* 0x0061 */
-#define ECB2_CMMA      0x80
-#define ECB2_IEP       0x20
-#define ECB2_PFMFI     0x08
-#define ECB2_ESCA      0x04
-#define ECB2_ZPCI_LSI  0x02
-       __u8    ecb2;                   /* 0x0062 */
-#define ECB3_AISI      0x20
-#define ECB3_AISII     0x10
-#define ECB3_DEA       0x08
-#define ECB3_AES       0x04
-#define ECB3_RI                0x01
-       __u8    ecb3;                   /* 0x0063 */
-#define ESCA_SCAOL_MASK ~0x3fU
-       __u32   scaol;                  /* 0x0064 */
-       __u8    sdf;                    /* 0x0068 */
-       __u8    epdx;                   /* 0x0069 */
-       __u8    cpnc;                   /* 0x006a */
-       __u8    reserved6b;             /* 0x006b */
-       __u32   todpr;                  /* 0x006c */
-#define GISA_FORMAT1 0x00000001
-       __u32   gd;                     /* 0x0070 */
-       __u8    reserved74[12];         /* 0x0074 */
-       __u64   mso;                    /* 0x0080 */
-       __u64   msl;                    /* 0x0088 */
-       __u64   psw_mask;               /* 0x0090 */
-       __u64   psw_addr;               /* 0x0098 */
-       __u64   gg14;                   /* 0x00a0 */
-       __u64   gg15;                   /* 0x00a8 */
-       __u8    reservedb0[8];          /* 0x00b0 */
-#define HPID_KVM       0x4
-#define HPID_VSIE      0x5
-       __u8    hpid;                   /* 0x00b8 */
-       __u8    reservedb9[7];          /* 0x00b9 */
-       union {
-               struct {
-                       __u32   eiparams;       /* 0x00c0 */
-                       __u16   extcpuaddr;     /* 0x00c4 */
-                       __u16   eic;            /* 0x00c6 */
-               };
-               __u64   mcic;                   /* 0x00c0 */
-       } __packed;
-       __u32   reservedc8;             /* 0x00c8 */
-       union {
-               struct {
-                       __u16   pgmilc;         /* 0x00cc */
-                       __u16   iprcc;          /* 0x00ce */
-               };
-               __u32   edc;                    /* 0x00cc */
-       } __packed;
-       union {
-               struct {
-                       __u32   dxc;            /* 0x00d0 */
-                       __u16   mcn;            /* 0x00d4 */
-                       __u8    perc;           /* 0x00d6 */
-                       __u8    peratmid;       /* 0x00d7 */
-               };
-               __u64   faddr;                  /* 0x00d0 */
-       } __packed;
-       __u64   peraddr;                /* 0x00d8 */
-       __u8    eai;                    /* 0x00e0 */
-       __u8    peraid;                 /* 0x00e1 */
-       __u8    oai;                    /* 0x00e2 */
-       __u8    armid;                  /* 0x00e3 */
-       __u8    reservede4[4];          /* 0x00e4 */
-       union {
-               __u64   tecmc;          /* 0x00e8 */
-               struct {
-                       __u16   subchannel_id;  /* 0x00e8 */
-                       __u16   subchannel_nr;  /* 0x00ea */
-                       __u32   io_int_parm;    /* 0x00ec */
-                       __u32   io_int_word;    /* 0x00f0 */
-               };
-       } __packed;
-       __u8    reservedf4[8];          /* 0x00f4 */
-#define CRYCB_FORMAT_MASK      0x00000003
-#define CRYCB_FORMAT0          0x00000000
-#define CRYCB_FORMAT1          0x00000001
-#define CRYCB_FORMAT2          0x00000003
-       __u32   crycbd;                 /* 0x00fc */
-       __u64   gcr[16];                /* 0x0100 */
-       union {
-               __u64   gbea;           /* 0x0180 */
-               __u64   sidad;
-       };
-       __u8    reserved188[8];         /* 0x0188 */
-       __u64   sdnxo;                  /* 0x0190 */
-       __u8    reserved198[8];         /* 0x0198 */
-       __u32   fac;                    /* 0x01a0 */
-       __u8    reserved1a4[20];        /* 0x01a4 */
-       __u64   cbrlo;                  /* 0x01b8 */
-       __u8    reserved1c0[8];         /* 0x01c0 */
-#define ECD_HOSTREGMGMT        0x20000000
-#define ECD_MEF                0x08000000
-#define ECD_ETOKENF    0x02000000
-#define ECD_ECC                0x00200000
-       __u32   ecd;                    /* 0x01c8 */
-       __u8    reserved1cc[18];        /* 0x01cc */
-       __u64   pp;                     /* 0x01de */
-       __u8    reserved1e6[2];         /* 0x01e6 */
-       __u64   itdba;                  /* 0x01e8 */
-       __u64   riccbd;                 /* 0x01f0 */
-       __u64   gvrd;                   /* 0x01f8 */
-} __packed __aligned(512);
-
-#endif /* SELFTEST_KVM_SIE_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h
deleted file mode 100644 (file)
index 8035a87..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UCALL_H
-#define SELFTEST_KVM_UCALL_H
-
-#include "kvm_util.h"
-
-#define UCALL_EXIT_REASON       KVM_EXIT_S390_SIEIC
-
-static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
-       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h
new file mode 100644 (file)
index 0000000..80fe9f6
--- /dev/null
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_APIC_H
+#define SELFTEST_KVM_APIC_H
+
+#include <stdint.h>
+
+#include "processor.h"
+#include "ucall_common.h"
+
+#define APIC_DEFAULT_GPA               0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE              0x0000001b
+#define MSR_IA32_APICBASE_BSP          (1<<8)
+#define MSR_IA32_APICBASE_EXTD         (1<<10)
+#define MSR_IA32_APICBASE_ENABLE       (1<<11)
+#define MSR_IA32_APICBASE_BASE         (0xfffff<<12)
+#define                GET_APIC_BASE(x)        (((x) >> 12) << 12)
+
+#define APIC_BASE_MSR  0x800
+#define X2APIC_ENABLE  (1UL << 10)
+#define        APIC_ID         0x20
+#define        APIC_LVR        0x30
+#define                GET_APIC_ID_FIELD(x)    (((x) >> 24) & 0xFF)
+#define        APIC_TASKPRI    0x80
+#define        APIC_PROCPRI    0xA0
+#define        APIC_EOI        0xB0
+#define        APIC_SPIV       0xF0
+#define                APIC_SPIV_FOCUS_DISABLED        (1 << 9)
+#define                APIC_SPIV_APIC_ENABLED          (1 << 8)
+#define APIC_IRR       0x200
+#define        APIC_ICR        0x300
+#define        APIC_LVTCMCI    0x2f0
+#define                APIC_DEST_SELF          0x40000
+#define                APIC_DEST_ALLINC        0x80000
+#define                APIC_DEST_ALLBUT        0xC0000
+#define                APIC_ICR_RR_MASK        0x30000
+#define                APIC_ICR_RR_INVALID     0x00000
+#define                APIC_ICR_RR_INPROG      0x10000
+#define                APIC_ICR_RR_VALID       0x20000
+#define                APIC_INT_LEVELTRIG      0x08000
+#define                APIC_INT_ASSERT         0x04000
+#define                APIC_ICR_BUSY           0x01000
+#define                APIC_DEST_LOGICAL       0x00800
+#define                APIC_DEST_PHYSICAL      0x00000
+#define                APIC_DM_FIXED           0x00000
+#define                APIC_DM_FIXED_MASK      0x00700
+#define                APIC_DM_LOWEST          0x00100
+#define                APIC_DM_SMI             0x00200
+#define                APIC_DM_REMRD           0x00300
+#define                APIC_DM_NMI             0x00400
+#define                APIC_DM_INIT            0x00500
+#define                APIC_DM_STARTUP         0x00600
+#define                APIC_DM_EXTINT          0x00700
+#define                APIC_VECTOR_MASK        0x000FF
+#define        APIC_ICR2       0x310
+#define                SET_APIC_DEST_FIELD(x)  ((x) << 24)
+#define APIC_LVTT      0x320
+#define                APIC_LVT_TIMER_ONESHOT          (0 << 17)
+#define                APIC_LVT_TIMER_PERIODIC         (1 << 17)
+#define                APIC_LVT_TIMER_TSCDEADLINE      (2 << 17)
+#define                APIC_LVT_MASKED                 (1 << 16)
+#define        APIC_TMICT      0x380
+#define        APIC_TMCCT      0x390
+#define        APIC_TDCR       0x3E0
+
+void apic_disable(void);
+void xapic_enable(void);
+void x2apic_enable(void);
+
+static inline uint32_t get_bsp_flag(void)
+{
+       return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static inline uint32_t xapic_read_reg(unsigned int reg)
+{
+       return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+}
+
+static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+{
+       ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+}
+
+static inline uint64_t x2apic_read_reg(unsigned int reg)
+{
+       return rdmsr(APIC_BASE_MSR + (reg >> 4));
+}
+
+static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
+{
+       return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
+}
+
+static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+{
+       uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+       __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
+                      fault, APIC_BASE_MSR + (reg >> 4), value);
+}
+
+static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
+{
+       uint8_t fault = x2apic_write_reg_safe(reg, value);
+
+       __GUEST_ASSERT(fault == GP_VECTOR,
+                      "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
+                      APIC_BASE_MSR + (reg >> 4), value, fault);
+}
+
+
+#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/x86/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h
new file mode 100644 (file)
index 0000000..5a74bb3
--- /dev/null
@@ -0,0 +1,1276 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_EVMCS_H
+#define SELFTEST_KVM_EVMCS_H
+
+#include <stdint.h>
+#include "hyperv.h"
+#include "vmx.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#define EVMCS_VERSION 1
+
+extern bool enable_evmcs;
+
+struct hv_enlightened_vmcs {
+       u32 revision_id;
+       u32 abort;
+
+       u16 host_es_selector;
+       u16 host_cs_selector;
+       u16 host_ss_selector;
+       u16 host_ds_selector;
+       u16 host_fs_selector;
+       u16 host_gs_selector;
+       u16 host_tr_selector;
+
+       u16 padding16_1;
+
+       u64 host_ia32_pat;
+       u64 host_ia32_efer;
+
+       u64 host_cr0;
+       u64 host_cr3;
+       u64 host_cr4;
+
+       u64 host_ia32_sysenter_esp;
+       u64 host_ia32_sysenter_eip;
+       u64 host_rip;
+       u32 host_ia32_sysenter_cs;
+
+       u32 pin_based_vm_exec_control;
+       u32 vm_exit_controls;
+       u32 secondary_vm_exec_control;
+
+       u64 io_bitmap_a;
+       u64 io_bitmap_b;
+       u64 msr_bitmap;
+
+       u16 guest_es_selector;
+       u16 guest_cs_selector;
+       u16 guest_ss_selector;
+       u16 guest_ds_selector;
+       u16 guest_fs_selector;
+       u16 guest_gs_selector;
+       u16 guest_ldtr_selector;
+       u16 guest_tr_selector;
+
+       u32 guest_es_limit;
+       u32 guest_cs_limit;
+       u32 guest_ss_limit;
+       u32 guest_ds_limit;
+       u32 guest_fs_limit;
+       u32 guest_gs_limit;
+       u32 guest_ldtr_limit;
+       u32 guest_tr_limit;
+       u32 guest_gdtr_limit;
+       u32 guest_idtr_limit;
+
+       u32 guest_es_ar_bytes;
+       u32 guest_cs_ar_bytes;
+       u32 guest_ss_ar_bytes;
+       u32 guest_ds_ar_bytes;
+       u32 guest_fs_ar_bytes;
+       u32 guest_gs_ar_bytes;
+       u32 guest_ldtr_ar_bytes;
+       u32 guest_tr_ar_bytes;
+
+       u64 guest_es_base;
+       u64 guest_cs_base;
+       u64 guest_ss_base;
+       u64 guest_ds_base;
+       u64 guest_fs_base;
+       u64 guest_gs_base;
+       u64 guest_ldtr_base;
+       u64 guest_tr_base;
+       u64 guest_gdtr_base;
+       u64 guest_idtr_base;
+
+       u64 padding64_1[3];
+
+       u64 vm_exit_msr_store_addr;
+       u64 vm_exit_msr_load_addr;
+       u64 vm_entry_msr_load_addr;
+
+       u64 cr3_target_value0;
+       u64 cr3_target_value1;
+       u64 cr3_target_value2;
+       u64 cr3_target_value3;
+
+       u32 page_fault_error_code_mask;
+       u32 page_fault_error_code_match;
+
+       u32 cr3_target_count;
+       u32 vm_exit_msr_store_count;
+       u32 vm_exit_msr_load_count;
+       u32 vm_entry_msr_load_count;
+
+       u64 tsc_offset;
+       u64 virtual_apic_page_addr;
+       u64 vmcs_link_pointer;
+
+       u64 guest_ia32_debugctl;
+       u64 guest_ia32_pat;
+       u64 guest_ia32_efer;
+
+       u64 guest_pdptr0;
+       u64 guest_pdptr1;
+       u64 guest_pdptr2;
+       u64 guest_pdptr3;
+
+       u64 guest_pending_dbg_exceptions;
+       u64 guest_sysenter_esp;
+       u64 guest_sysenter_eip;
+
+       u32 guest_activity_state;
+       u32 guest_sysenter_cs;
+
+       u64 cr0_guest_host_mask;
+       u64 cr4_guest_host_mask;
+       u64 cr0_read_shadow;
+       u64 cr4_read_shadow;
+       u64 guest_cr0;
+       u64 guest_cr3;
+       u64 guest_cr4;
+       u64 guest_dr7;
+
+       u64 host_fs_base;
+       u64 host_gs_base;
+       u64 host_tr_base;
+       u64 host_gdtr_base;
+       u64 host_idtr_base;
+       u64 host_rsp;
+
+       u64 ept_pointer;
+
+       u16 virtual_processor_id;
+       u16 padding16_2[3];
+
+       u64 padding64_2[5];
+       u64 guest_physical_address;
+
+       u32 vm_instruction_error;
+       u32 vm_exit_reason;
+       u32 vm_exit_intr_info;
+       u32 vm_exit_intr_error_code;
+       u32 idt_vectoring_info_field;
+       u32 idt_vectoring_error_code;
+       u32 vm_exit_instruction_len;
+       u32 vmx_instruction_info;
+
+       u64 exit_qualification;
+       u64 exit_io_instruction_ecx;
+       u64 exit_io_instruction_esi;
+       u64 exit_io_instruction_edi;
+       u64 exit_io_instruction_eip;
+
+       u64 guest_linear_address;
+       u64 guest_rsp;
+       u64 guest_rflags;
+
+       u32 guest_interruptibility_info;
+       u32 cpu_based_vm_exec_control;
+       u32 exception_bitmap;
+       u32 vm_entry_controls;
+       u32 vm_entry_intr_info_field;
+       u32 vm_entry_exception_error_code;
+       u32 vm_entry_instruction_len;
+       u32 tpr_threshold;
+
+       u64 guest_rip;
+
+       u32 hv_clean_fields;
+       u32 padding32_1;
+       u32 hv_synthetic_controls;
+       struct {
+               u32 nested_flush_hypercall:1;
+               u32 msr_bitmap:1;
+               u32 reserved:30;
+       }  __packed hv_enlightenments_control;
+       u32 hv_vp_id;
+       u32 padding32_2;
+       u64 hv_vm_id;
+       u64 partition_assist_page;
+       u64 padding64_4[4];
+       u64 guest_bndcfgs;
+       u64 guest_ia32_perf_global_ctrl;
+       u64 guest_ia32_s_cet;
+       u64 guest_ssp;
+       u64 guest_ia32_int_ssp_table_addr;
+       u64 guest_ia32_lbr_ctl;
+       u64 padding64_5[2];
+       u64 xss_exit_bitmap;
+       u64 encls_exiting_bitmap;
+       u64 host_ia32_perf_global_ctrl;
+       u64 tsc_multiplier;
+       u64 host_ia32_s_cet;
+       u64 host_ssp;
+       u64 host_ia32_int_ssp_table_addr;
+       u64 padding64_6;
+} __packed;
+
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE                     0
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP                BIT(0)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP               BIT(1)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2             BIT(2)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1             BIT(3)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC             BIT(4)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT            BIT(5)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY            BIT(6)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN            BIT(7)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR                     BIT(8)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT             BIT(9)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC              BIT(10)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1               BIT(11)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2               BIT(12)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER             BIT(13)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1                BIT(14)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL    BIT(15)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL                      0xFFFF
+
+#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
+
+extern struct hv_enlightened_vmcs *current_evmcs;
+
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
+
+static inline void evmcs_enable(void)
+{
+       enable_evmcs = true;
+}
+
+static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
+{
+       current_vp_assist->current_nested_vmcs = vmcs_pa;
+       current_vp_assist->enlighten_vmentry = 1;
+
+       current_evmcs = vmcs;
+
+       return 0;
+}
+
+static inline bool load_evmcs(struct hyperv_test_pages *hv)
+{
+       if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs))
+               return false;
+
+       current_evmcs->revision_id = EVMCS_VERSION;
+
+       return true;
+}
+
+static inline int evmcs_vmptrst(uint64_t *value)
+{
+       *value = current_vp_assist->current_nested_vmcs &
+               ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+       return 0;
+}
+
+static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
+{
+       switch (encoding) {
+       case GUEST_RIP:
+               *value = current_evmcs->guest_rip;
+               break;
+       case GUEST_RSP:
+               *value = current_evmcs->guest_rsp;
+               break;
+       case GUEST_RFLAGS:
+               *value = current_evmcs->guest_rflags;
+               break;
+       case HOST_IA32_PAT:
+               *value = current_evmcs->host_ia32_pat;
+               break;
+       case HOST_IA32_EFER:
+               *value = current_evmcs->host_ia32_efer;
+               break;
+       case HOST_CR0:
+               *value = current_evmcs->host_cr0;
+               break;
+       case HOST_CR3:
+               *value = current_evmcs->host_cr3;
+               break;
+       case HOST_CR4:
+               *value = current_evmcs->host_cr4;
+               break;
+       case HOST_IA32_SYSENTER_ESP:
+               *value = current_evmcs->host_ia32_sysenter_esp;
+               break;
+       case HOST_IA32_SYSENTER_EIP:
+               *value = current_evmcs->host_ia32_sysenter_eip;
+               break;
+       case HOST_RIP:
+               *value = current_evmcs->host_rip;
+               break;
+       case IO_BITMAP_A:
+               *value = current_evmcs->io_bitmap_a;
+               break;
+       case IO_BITMAP_B:
+               *value = current_evmcs->io_bitmap_b;
+               break;
+       case MSR_BITMAP:
+               *value = current_evmcs->msr_bitmap;
+               break;
+       case GUEST_ES_BASE:
+               *value = current_evmcs->guest_es_base;
+               break;
+       case GUEST_CS_BASE:
+               *value = current_evmcs->guest_cs_base;
+               break;
+       case GUEST_SS_BASE:
+               *value = current_evmcs->guest_ss_base;
+               break;
+       case GUEST_DS_BASE:
+               *value = current_evmcs->guest_ds_base;
+               break;
+       case GUEST_FS_BASE:
+               *value = current_evmcs->guest_fs_base;
+               break;
+       case GUEST_GS_BASE:
+               *value = current_evmcs->guest_gs_base;
+               break;
+       case GUEST_LDTR_BASE:
+               *value = current_evmcs->guest_ldtr_base;
+               break;
+       case GUEST_TR_BASE:
+               *value = current_evmcs->guest_tr_base;
+               break;
+       case GUEST_GDTR_BASE:
+               *value = current_evmcs->guest_gdtr_base;
+               break;
+       case GUEST_IDTR_BASE:
+               *value = current_evmcs->guest_idtr_base;
+               break;
+       case TSC_OFFSET:
+               *value = current_evmcs->tsc_offset;
+               break;
+       case VIRTUAL_APIC_PAGE_ADDR:
+               *value = current_evmcs->virtual_apic_page_addr;
+               break;
+       case VMCS_LINK_POINTER:
+               *value = current_evmcs->vmcs_link_pointer;
+               break;
+       case GUEST_IA32_DEBUGCTL:
+               *value = current_evmcs->guest_ia32_debugctl;
+               break;
+       case GUEST_IA32_PAT:
+               *value = current_evmcs->guest_ia32_pat;
+               break;
+       case GUEST_IA32_EFER:
+               *value = current_evmcs->guest_ia32_efer;
+               break;
+       case GUEST_PDPTR0:
+               *value = current_evmcs->guest_pdptr0;
+               break;
+       case GUEST_PDPTR1:
+               *value = current_evmcs->guest_pdptr1;
+               break;
+       case GUEST_PDPTR2:
+               *value = current_evmcs->guest_pdptr2;
+               break;
+       case GUEST_PDPTR3:
+               *value = current_evmcs->guest_pdptr3;
+               break;
+       case GUEST_PENDING_DBG_EXCEPTIONS:
+               *value = current_evmcs->guest_pending_dbg_exceptions;
+               break;
+       case GUEST_SYSENTER_ESP:
+               *value = current_evmcs->guest_sysenter_esp;
+               break;
+       case GUEST_SYSENTER_EIP:
+               *value = current_evmcs->guest_sysenter_eip;
+               break;
+       case CR0_GUEST_HOST_MASK:
+               *value = current_evmcs->cr0_guest_host_mask;
+               break;
+       case CR4_GUEST_HOST_MASK:
+               *value = current_evmcs->cr4_guest_host_mask;
+               break;
+       case CR0_READ_SHADOW:
+               *value = current_evmcs->cr0_read_shadow;
+               break;
+       case CR4_READ_SHADOW:
+               *value = current_evmcs->cr4_read_shadow;
+               break;
+       case GUEST_CR0:
+               *value = current_evmcs->guest_cr0;
+               break;
+       case GUEST_CR3:
+               *value = current_evmcs->guest_cr3;
+               break;
+       case GUEST_CR4:
+               *value = current_evmcs->guest_cr4;
+               break;
+       case GUEST_DR7:
+               *value = current_evmcs->guest_dr7;
+               break;
+       case HOST_FS_BASE:
+               *value = current_evmcs->host_fs_base;
+               break;
+       case HOST_GS_BASE:
+               *value = current_evmcs->host_gs_base;
+               break;
+       case HOST_TR_BASE:
+               *value = current_evmcs->host_tr_base;
+               break;
+       case HOST_GDTR_BASE:
+               *value = current_evmcs->host_gdtr_base;
+               break;
+       case HOST_IDTR_BASE:
+               *value = current_evmcs->host_idtr_base;
+               break;
+       case HOST_RSP:
+               *value = current_evmcs->host_rsp;
+               break;
+       case EPT_POINTER:
+               *value = current_evmcs->ept_pointer;
+               break;
+       case GUEST_BNDCFGS:
+               *value = current_evmcs->guest_bndcfgs;
+               break;
+       case XSS_EXIT_BITMAP:
+               *value = current_evmcs->xss_exit_bitmap;
+               break;
+       case GUEST_PHYSICAL_ADDRESS:
+               *value = current_evmcs->guest_physical_address;
+               break;
+       case EXIT_QUALIFICATION:
+               *value = current_evmcs->exit_qualification;
+               break;
+       case GUEST_LINEAR_ADDRESS:
+               *value = current_evmcs->guest_linear_address;
+               break;
+       case VM_EXIT_MSR_STORE_ADDR:
+               *value = current_evmcs->vm_exit_msr_store_addr;
+               break;
+       case VM_EXIT_MSR_LOAD_ADDR:
+               *value = current_evmcs->vm_exit_msr_load_addr;
+               break;
+       case VM_ENTRY_MSR_LOAD_ADDR:
+               *value = current_evmcs->vm_entry_msr_load_addr;
+               break;
+       case CR3_TARGET_VALUE0:
+               *value = current_evmcs->cr3_target_value0;
+               break;
+       case CR3_TARGET_VALUE1:
+               *value = current_evmcs->cr3_target_value1;
+               break;
+       case CR3_TARGET_VALUE2:
+               *value = current_evmcs->cr3_target_value2;
+               break;
+       case CR3_TARGET_VALUE3:
+               *value = current_evmcs->cr3_target_value3;
+               break;
+       case TPR_THRESHOLD:
+               *value = current_evmcs->tpr_threshold;
+               break;
+       case GUEST_INTERRUPTIBILITY_INFO:
+               *value = current_evmcs->guest_interruptibility_info;
+               break;
+       case CPU_BASED_VM_EXEC_CONTROL:
+               *value = current_evmcs->cpu_based_vm_exec_control;
+               break;
+       case EXCEPTION_BITMAP:
+               *value = current_evmcs->exception_bitmap;
+               break;
+       case VM_ENTRY_CONTROLS:
+               *value = current_evmcs->vm_entry_controls;
+               break;
+       case VM_ENTRY_INTR_INFO_FIELD:
+               *value = current_evmcs->vm_entry_intr_info_field;
+               break;
+       case VM_ENTRY_EXCEPTION_ERROR_CODE:
+               *value = current_evmcs->vm_entry_exception_error_code;
+               break;
+       case VM_ENTRY_INSTRUCTION_LEN:
+               *value = current_evmcs->vm_entry_instruction_len;
+               break;
+       case HOST_IA32_SYSENTER_CS:
+               *value = current_evmcs->host_ia32_sysenter_cs;
+               break;
+       case PIN_BASED_VM_EXEC_CONTROL:
+               *value = current_evmcs->pin_based_vm_exec_control;
+               break;
+       case VM_EXIT_CONTROLS:
+               *value = current_evmcs->vm_exit_controls;
+               break;
+       case SECONDARY_VM_EXEC_CONTROL:
+               *value = current_evmcs->secondary_vm_exec_control;
+               break;
+       case GUEST_ES_LIMIT:
+               *value = current_evmcs->guest_es_limit;
+               break;
+       case GUEST_CS_LIMIT:
+               *value = current_evmcs->guest_cs_limit;
+               break;
+       case GUEST_SS_LIMIT:
+               *value = current_evmcs->guest_ss_limit;
+               break;
+       case GUEST_DS_LIMIT:
+               *value = current_evmcs->guest_ds_limit;
+               break;
+       case GUEST_FS_LIMIT:
+               *value = current_evmcs->guest_fs_limit;
+               break;
+       case GUEST_GS_LIMIT:
+               *value = current_evmcs->guest_gs_limit;
+               break;
+       case GUEST_LDTR_LIMIT:
+               *value = current_evmcs->guest_ldtr_limit;
+               break;
+       case GUEST_TR_LIMIT:
+               *value = current_evmcs->guest_tr_limit;
+               break;
+       case GUEST_GDTR_LIMIT:
+               *value = current_evmcs->guest_gdtr_limit;
+               break;
+       case GUEST_IDTR_LIMIT:
+               *value = current_evmcs->guest_idtr_limit;
+               break;
+       case GUEST_ES_AR_BYTES:
+               *value = current_evmcs->guest_es_ar_bytes;
+               break;
+       case GUEST_CS_AR_BYTES:
+               *value = current_evmcs->guest_cs_ar_bytes;
+               break;
+       case GUEST_SS_AR_BYTES:
+               *value = current_evmcs->guest_ss_ar_bytes;
+               break;
+       case GUEST_DS_AR_BYTES:
+               *value = current_evmcs->guest_ds_ar_bytes;
+               break;
+       case GUEST_FS_AR_BYTES:
+               *value = current_evmcs->guest_fs_ar_bytes;
+               break;
+       case GUEST_GS_AR_BYTES:
+               *value = current_evmcs->guest_gs_ar_bytes;
+               break;
+       case GUEST_LDTR_AR_BYTES:
+               *value = current_evmcs->guest_ldtr_ar_bytes;
+               break;
+       case GUEST_TR_AR_BYTES:
+               *value = current_evmcs->guest_tr_ar_bytes;
+               break;
+       case GUEST_ACTIVITY_STATE:
+               *value = current_evmcs->guest_activity_state;
+               break;
+       case GUEST_SYSENTER_CS:
+               *value = current_evmcs->guest_sysenter_cs;
+               break;
+       case VM_INSTRUCTION_ERROR:
+               *value = current_evmcs->vm_instruction_error;
+               break;
+       case VM_EXIT_REASON:
+               *value = current_evmcs->vm_exit_reason;
+               break;
+       case VM_EXIT_INTR_INFO:
+               *value = current_evmcs->vm_exit_intr_info;
+               break;
+       case VM_EXIT_INTR_ERROR_CODE:
+               *value = current_evmcs->vm_exit_intr_error_code;
+               break;
+       case IDT_VECTORING_INFO_FIELD:
+               *value = current_evmcs->idt_vectoring_info_field;
+               break;
+       case IDT_VECTORING_ERROR_CODE:
+               *value = current_evmcs->idt_vectoring_error_code;
+               break;
+       case VM_EXIT_INSTRUCTION_LEN:
+               *value = current_evmcs->vm_exit_instruction_len;
+               break;
+       case VMX_INSTRUCTION_INFO:
+               *value = current_evmcs->vmx_instruction_info;
+               break;
+       case PAGE_FAULT_ERROR_CODE_MASK:
+               *value = current_evmcs->page_fault_error_code_mask;
+               break;
+       case PAGE_FAULT_ERROR_CODE_MATCH:
+               *value = current_evmcs->page_fault_error_code_match;
+               break;
+       case CR3_TARGET_COUNT:
+               *value = current_evmcs->cr3_target_count;
+               break;
+       case VM_EXIT_MSR_STORE_COUNT:
+               *value = current_evmcs->vm_exit_msr_store_count;
+               break;
+       case VM_EXIT_MSR_LOAD_COUNT:
+               *value = current_evmcs->vm_exit_msr_load_count;
+               break;
+       case VM_ENTRY_MSR_LOAD_COUNT:
+               *value = current_evmcs->vm_entry_msr_load_count;
+               break;
+       case HOST_ES_SELECTOR:
+               *value = current_evmcs->host_es_selector;
+               break;
+       case HOST_CS_SELECTOR:
+               *value = current_evmcs->host_cs_selector;
+               break;
+       case HOST_SS_SELECTOR:
+               *value = current_evmcs->host_ss_selector;
+               break;
+       case HOST_DS_SELECTOR:
+               *value = current_evmcs->host_ds_selector;
+               break;
+       case HOST_FS_SELECTOR:
+               *value = current_evmcs->host_fs_selector;
+               break;
+       case HOST_GS_SELECTOR:
+               *value = current_evmcs->host_gs_selector;
+               break;
+       case HOST_TR_SELECTOR:
+               *value = current_evmcs->host_tr_selector;
+               break;
+       case GUEST_ES_SELECTOR:
+               *value = current_evmcs->guest_es_selector;
+               break;
+       case GUEST_CS_SELECTOR:
+               *value = current_evmcs->guest_cs_selector;
+               break;
+       case GUEST_SS_SELECTOR:
+               *value = current_evmcs->guest_ss_selector;
+               break;
+       case GUEST_DS_SELECTOR:
+               *value = current_evmcs->guest_ds_selector;
+               break;
+       case GUEST_FS_SELECTOR:
+               *value = current_evmcs->guest_fs_selector;
+               break;
+       case GUEST_GS_SELECTOR:
+               *value = current_evmcs->guest_gs_selector;
+               break;
+       case GUEST_LDTR_SELECTOR:
+               *value = current_evmcs->guest_ldtr_selector;
+               break;
+       case GUEST_TR_SELECTOR:
+               *value = current_evmcs->guest_tr_selector;
+               break;
+       case VIRTUAL_PROCESSOR_ID:
+               *value = current_evmcs->virtual_processor_id;
+               break;
+       case HOST_IA32_PERF_GLOBAL_CTRL:
+               *value = current_evmcs->host_ia32_perf_global_ctrl;
+               break;
+       case GUEST_IA32_PERF_GLOBAL_CTRL:
+               *value = current_evmcs->guest_ia32_perf_global_ctrl;
+               break;
+       case ENCLS_EXITING_BITMAP:
+               *value = current_evmcs->encls_exiting_bitmap;
+               break;
+       case TSC_MULTIPLIER:
+               *value = current_evmcs->tsc_multiplier;
+               break;
+       default: return 1;
+       }
+
+       return 0;
+}
+
+static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
+{
+       switch (encoding) {
+       case GUEST_RIP:
+               current_evmcs->guest_rip = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case GUEST_RSP:
+               current_evmcs->guest_rsp = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
+               break;
+       case GUEST_RFLAGS:
+               current_evmcs->guest_rflags = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
+               break;
+       case HOST_IA32_PAT:
+               current_evmcs->host_ia32_pat = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_IA32_EFER:
+               current_evmcs->host_ia32_efer = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_CR0:
+               current_evmcs->host_cr0 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_CR3:
+               current_evmcs->host_cr3 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_CR4:
+               current_evmcs->host_cr4 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_IA32_SYSENTER_ESP:
+               current_evmcs->host_ia32_sysenter_esp = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_IA32_SYSENTER_EIP:
+               current_evmcs->host_ia32_sysenter_eip = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_RIP:
+               current_evmcs->host_rip = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case IO_BITMAP_A:
+               current_evmcs->io_bitmap_a = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
+               break;
+       case IO_BITMAP_B:
+               current_evmcs->io_bitmap_b = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
+               break;
+       case MSR_BITMAP:
+               current_evmcs->msr_bitmap = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+               break;
+       case GUEST_ES_BASE:
+               current_evmcs->guest_es_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_CS_BASE:
+               current_evmcs->guest_cs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_SS_BASE:
+               current_evmcs->guest_ss_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_DS_BASE:
+               current_evmcs->guest_ds_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_FS_BASE:
+               current_evmcs->guest_fs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GS_BASE:
+               current_evmcs->guest_gs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_LDTR_BASE:
+               current_evmcs->guest_ldtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_TR_BASE:
+               current_evmcs->guest_tr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GDTR_BASE:
+               current_evmcs->guest_gdtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_IDTR_BASE:
+               current_evmcs->guest_idtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case TSC_OFFSET:
+               current_evmcs->tsc_offset = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       case VIRTUAL_APIC_PAGE_ADDR:
+               current_evmcs->virtual_apic_page_addr = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       case VMCS_LINK_POINTER:
+               current_evmcs->vmcs_link_pointer = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_IA32_DEBUGCTL:
+               current_evmcs->guest_ia32_debugctl = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_IA32_PAT:
+               current_evmcs->guest_ia32_pat = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_IA32_EFER:
+               current_evmcs->guest_ia32_efer = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PDPTR0:
+               current_evmcs->guest_pdptr0 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PDPTR1:
+               current_evmcs->guest_pdptr1 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PDPTR2:
+               current_evmcs->guest_pdptr2 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PDPTR3:
+               current_evmcs->guest_pdptr3 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_PENDING_DBG_EXCEPTIONS:
+               current_evmcs->guest_pending_dbg_exceptions = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_SYSENTER_ESP:
+               current_evmcs->guest_sysenter_esp = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_SYSENTER_EIP:
+               current_evmcs->guest_sysenter_eip = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case CR0_GUEST_HOST_MASK:
+               current_evmcs->cr0_guest_host_mask = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case CR4_GUEST_HOST_MASK:
+               current_evmcs->cr4_guest_host_mask = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case CR0_READ_SHADOW:
+               current_evmcs->cr0_read_shadow = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case CR4_READ_SHADOW:
+               current_evmcs->cr4_read_shadow = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case GUEST_CR0:
+               current_evmcs->guest_cr0 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case GUEST_CR3:
+               current_evmcs->guest_cr3 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case GUEST_CR4:
+               current_evmcs->guest_cr4 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case GUEST_DR7:
+               current_evmcs->guest_dr7 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
+               break;
+       case HOST_FS_BASE:
+               current_evmcs->host_fs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_GS_BASE:
+               current_evmcs->host_gs_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_TR_BASE:
+               current_evmcs->host_tr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_GDTR_BASE:
+               current_evmcs->host_gdtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_IDTR_BASE:
+               current_evmcs->host_idtr_base = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case HOST_RSP:
+               current_evmcs->host_rsp = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+               break;
+       case EPT_POINTER:
+               current_evmcs->ept_pointer = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
+               break;
+       case GUEST_BNDCFGS:
+               current_evmcs->guest_bndcfgs = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case XSS_EXIT_BITMAP:
+               current_evmcs->xss_exit_bitmap = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       case GUEST_PHYSICAL_ADDRESS:
+               current_evmcs->guest_physical_address = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case EXIT_QUALIFICATION:
+               current_evmcs->exit_qualification = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case GUEST_LINEAR_ADDRESS:
+               current_evmcs->guest_linear_address = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_MSR_STORE_ADDR:
+               current_evmcs->vm_exit_msr_store_addr = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_EXIT_MSR_LOAD_ADDR:
+               current_evmcs->vm_exit_msr_load_addr = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_ENTRY_MSR_LOAD_ADDR:
+               current_evmcs->vm_entry_msr_load_addr = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_VALUE0:
+               current_evmcs->cr3_target_value0 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_VALUE1:
+               current_evmcs->cr3_target_value1 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_VALUE2:
+               current_evmcs->cr3_target_value2 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_VALUE3:
+               current_evmcs->cr3_target_value3 = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case TPR_THRESHOLD:
+               current_evmcs->tpr_threshold = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case GUEST_INTERRUPTIBILITY_INFO:
+               current_evmcs->guest_interruptibility_info = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
+               break;
+       case CPU_BASED_VM_EXEC_CONTROL:
+               current_evmcs->cpu_based_vm_exec_control = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC;
+               break;
+       case EXCEPTION_BITMAP:
+               current_evmcs->exception_bitmap = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN;
+               break;
+       case VM_ENTRY_CONTROLS:
+               current_evmcs->vm_entry_controls = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY;
+               break;
+       case VM_ENTRY_INTR_INFO_FIELD:
+               current_evmcs->vm_entry_intr_info_field = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
+               break;
+       case VM_ENTRY_EXCEPTION_ERROR_CODE:
+               current_evmcs->vm_entry_exception_error_code = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
+               break;
+       case VM_ENTRY_INSTRUCTION_LEN:
+               current_evmcs->vm_entry_instruction_len = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
+               break;
+       case HOST_IA32_SYSENTER_CS:
+               current_evmcs->host_ia32_sysenter_cs = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case PIN_BASED_VM_EXEC_CONTROL:
+               current_evmcs->pin_based_vm_exec_control = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
+               break;
+       case VM_EXIT_CONTROLS:
+               current_evmcs->vm_exit_controls = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
+               break;
+       case SECONDARY_VM_EXEC_CONTROL:
+               current_evmcs->secondary_vm_exec_control = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
+               break;
+       case GUEST_ES_LIMIT:
+               current_evmcs->guest_es_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_CS_LIMIT:
+               current_evmcs->guest_cs_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_SS_LIMIT:
+               current_evmcs->guest_ss_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_DS_LIMIT:
+               current_evmcs->guest_ds_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_FS_LIMIT:
+               current_evmcs->guest_fs_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GS_LIMIT:
+               current_evmcs->guest_gs_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_LDTR_LIMIT:
+               current_evmcs->guest_ldtr_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_TR_LIMIT:
+               current_evmcs->guest_tr_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GDTR_LIMIT:
+               current_evmcs->guest_gdtr_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_IDTR_LIMIT:
+               current_evmcs->guest_idtr_limit = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_ES_AR_BYTES:
+               current_evmcs->guest_es_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_CS_AR_BYTES:
+               current_evmcs->guest_cs_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_SS_AR_BYTES:
+               current_evmcs->guest_ss_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_DS_AR_BYTES:
+               current_evmcs->guest_ds_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_FS_AR_BYTES:
+               current_evmcs->guest_fs_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GS_AR_BYTES:
+               current_evmcs->guest_gs_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_LDTR_AR_BYTES:
+               current_evmcs->guest_ldtr_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_TR_AR_BYTES:
+               current_evmcs->guest_tr_ar_bytes = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_ACTIVITY_STATE:
+               current_evmcs->guest_activity_state = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case GUEST_SYSENTER_CS:
+               current_evmcs->guest_sysenter_cs = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case VM_INSTRUCTION_ERROR:
+               current_evmcs->vm_instruction_error = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_REASON:
+               current_evmcs->vm_exit_reason = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_INTR_INFO:
+               current_evmcs->vm_exit_intr_info = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_INTR_ERROR_CODE:
+               current_evmcs->vm_exit_intr_error_code = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case IDT_VECTORING_INFO_FIELD:
+               current_evmcs->idt_vectoring_info_field = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case IDT_VECTORING_ERROR_CODE:
+               current_evmcs->idt_vectoring_error_code = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VM_EXIT_INSTRUCTION_LEN:
+               current_evmcs->vm_exit_instruction_len = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case VMX_INSTRUCTION_INFO:
+               current_evmcs->vmx_instruction_info = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
+               break;
+       case PAGE_FAULT_ERROR_CODE_MASK:
+               current_evmcs->page_fault_error_code_mask = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case PAGE_FAULT_ERROR_CODE_MATCH:
+               current_evmcs->page_fault_error_code_match = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case CR3_TARGET_COUNT:
+               current_evmcs->cr3_target_count = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_EXIT_MSR_STORE_COUNT:
+               current_evmcs->vm_exit_msr_store_count = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_EXIT_MSR_LOAD_COUNT:
+               current_evmcs->vm_exit_msr_load_count = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case VM_ENTRY_MSR_LOAD_COUNT:
+               current_evmcs->vm_entry_msr_load_count = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+               break;
+       case HOST_ES_SELECTOR:
+               current_evmcs->host_es_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_CS_SELECTOR:
+               current_evmcs->host_cs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_SS_SELECTOR:
+               current_evmcs->host_ss_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_DS_SELECTOR:
+               current_evmcs->host_ds_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_FS_SELECTOR:
+               current_evmcs->host_fs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_GS_SELECTOR:
+               current_evmcs->host_gs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case HOST_TR_SELECTOR:
+               current_evmcs->host_tr_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case GUEST_ES_SELECTOR:
+               current_evmcs->guest_es_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_CS_SELECTOR:
+               current_evmcs->guest_cs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_SS_SELECTOR:
+               current_evmcs->guest_ss_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_DS_SELECTOR:
+               current_evmcs->guest_ds_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_FS_SELECTOR:
+               current_evmcs->guest_fs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_GS_SELECTOR:
+               current_evmcs->guest_gs_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_LDTR_SELECTOR:
+               current_evmcs->guest_ldtr_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case GUEST_TR_SELECTOR:
+               current_evmcs->guest_tr_selector = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
+               break;
+       case VIRTUAL_PROCESSOR_ID:
+               current_evmcs->virtual_processor_id = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
+               break;
+       case HOST_IA32_PERF_GLOBAL_CTRL:
+               current_evmcs->host_ia32_perf_global_ctrl = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+               break;
+       case GUEST_IA32_PERF_GLOBAL_CTRL:
+               current_evmcs->guest_ia32_perf_global_ctrl = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+               break;
+       case ENCLS_EXITING_BITMAP:
+               current_evmcs->encls_exiting_bitmap = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       case TSC_MULTIPLIER:
+               current_evmcs->tsc_multiplier = value;
+               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+               break;
+       default: return 1;
+       }
+
+       return 0;
+}
+
+static inline int evmcs_vmlaunch(void)
+{
+       int ret;
+
+       current_evmcs->hv_clean_fields = 0;
+
+       __asm__ __volatile__("push %%rbp;"
+                            "push %%rcx;"
+                            "push %%rdx;"
+                            "push %%rsi;"
+                            "push %%rdi;"
+                            "push $0;"
+                            "mov %%rsp, (%[host_rsp]);"
+                            "lea 1f(%%rip), %%rax;"
+                            "mov %%rax, (%[host_rip]);"
+                            "vmlaunch;"
+                            "incq (%%rsp);"
+                            "1: pop %%rax;"
+                            "pop %%rdi;"
+                            "pop %%rsi;"
+                            "pop %%rdx;"
+                            "pop %%rcx;"
+                            "pop %%rbp;"
+                            : [ret]"=&a"(ret)
+                            : [host_rsp]"r"
+                              ((uint64_t)&current_evmcs->host_rsp),
+                              [host_rip]"r"
+                              ((uint64_t)&current_evmcs->host_rip)
+                            : "memory", "cc", "rbx", "r8", "r9", "r10",
+                              "r11", "r12", "r13", "r14", "r15");
+       return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int evmcs_vmresume(void)
+{
+       int ret;
+
+       /* HOST_RIP */
+       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+       /* HOST_RSP */
+       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
+
+       __asm__ __volatile__("push %%rbp;"
+                            "push %%rcx;"
+                            "push %%rdx;"
+                            "push %%rsi;"
+                            "push %%rdi;"
+                            "push $0;"
+                            "mov %%rsp, (%[host_rsp]);"
+                            "lea 1f(%%rip), %%rax;"
+                            "mov %%rax, (%[host_rip]);"
+                            "vmresume;"
+                            "incq (%%rsp);"
+                            "1: pop %%rax;"
+                            "pop %%rdi;"
+                            "pop %%rsi;"
+                            "pop %%rdx;"
+                            "pop %%rcx;"
+                            "pop %%rbp;"
+                            : [ret]"=&a"(ret)
+                            : [host_rsp]"r"
+                              ((uint64_t)&current_evmcs->host_rsp),
+                              [host_rip]"r"
+                              ((uint64_t)&current_evmcs->host_rip)
+                            : "memory", "cc", "rbx", "r8", "r9", "r10",
+                              "r11", "r12", "r13", "r14", "r15");
+       return ret;
+}
+
+#endif /* !SELFTEST_KVM_EVMCS_H */
diff --git a/tools/testing/selftests/kvm/include/x86/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h
new file mode 100644 (file)
index 0000000..f13e532
--- /dev/null
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_HYPERV_H
+#define SELFTEST_KVM_HYPERV_H
+
+#include "processor.h"
+
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS  0x40000000
+#define HYPERV_CPUID_INTERFACE                 0x40000001
+#define HYPERV_CPUID_VERSION                   0x40000002
+#define HYPERV_CPUID_FEATURES                  0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO          0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS          0x40000005
+#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES   0x40000007
+#define HYPERV_CPUID_NESTED_FEATURES           0x4000000A
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS   0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE                  0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES      0x40000082
+
+#define HV_X64_MSR_GUEST_OS_ID                 0x40000000
+#define HV_X64_MSR_HYPERCALL                   0x40000001
+#define HV_X64_MSR_VP_INDEX                    0x40000002
+#define HV_X64_MSR_RESET                       0x40000003
+#define HV_X64_MSR_VP_RUNTIME                  0x40000010
+#define HV_X64_MSR_TIME_REF_COUNT              0x40000020
+#define HV_X64_MSR_REFERENCE_TSC               0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY               0x40000022
+#define HV_X64_MSR_APIC_FREQUENCY              0x40000023
+#define HV_X64_MSR_EOI                         0x40000070
+#define HV_X64_MSR_ICR                         0x40000071
+#define HV_X64_MSR_TPR                         0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE              0x40000073
+#define HV_X64_MSR_SCONTROL                    0x40000080
+#define HV_X64_MSR_SVERSION                    0x40000081
+#define HV_X64_MSR_SIEFP                       0x40000082
+#define HV_X64_MSR_SIMP                                0x40000083
+#define HV_X64_MSR_EOM                         0x40000084
+#define HV_X64_MSR_SINT0                       0x40000090
+#define HV_X64_MSR_SINT1                       0x40000091
+#define HV_X64_MSR_SINT2                       0x40000092
+#define HV_X64_MSR_SINT3                       0x40000093
+#define HV_X64_MSR_SINT4                       0x40000094
+#define HV_X64_MSR_SINT5                       0x40000095
+#define HV_X64_MSR_SINT6                       0x40000096
+#define HV_X64_MSR_SINT7                       0x40000097
+#define HV_X64_MSR_SINT8                       0x40000098
+#define HV_X64_MSR_SINT9                       0x40000099
+#define HV_X64_MSR_SINT10                      0x4000009A
+#define HV_X64_MSR_SINT11                      0x4000009B
+#define HV_X64_MSR_SINT12                      0x4000009C
+#define HV_X64_MSR_SINT13                      0x4000009D
+#define HV_X64_MSR_SINT14                      0x4000009E
+#define HV_X64_MSR_SINT15                      0x4000009F
+#define HV_X64_MSR_STIMER0_CONFIG              0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT               0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG              0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT               0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG              0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT               0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG              0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT               0x400000B7
+#define HV_X64_MSR_GUEST_IDLE                  0x400000F0
+#define HV_X64_MSR_CRASH_P0                    0x40000100
+#define HV_X64_MSR_CRASH_P1                    0x40000101
+#define HV_X64_MSR_CRASH_P2                    0x40000102
+#define HV_X64_MSR_CRASH_P3                    0x40000103
+#define HV_X64_MSR_CRASH_P4                    0x40000104
+#define HV_X64_MSR_CRASH_CTL                   0x40000105
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL     0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL       0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS                0x40000108
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL       0x40000118
+
+#define HV_X64_MSR_SYNDBG_CONTROL              0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS               0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER          0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER          0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER       0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS              0x400000FF
+
+/* HYPERV_CPUID_FEATURES.EAX */
+#define HV_MSR_VP_RUNTIME_AVAILABLE            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE                \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1)
+#define HV_MSR_SYNIC_AVAILABLE                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2)
+#define HV_MSR_SYNTIMER_AVAILABLE              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3)
+#define HV_MSR_APIC_ACCESS_AVAILABLE           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4)
+#define HV_MSR_HYPERCALL_AVAILABLE             \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5)
+#define HV_MSR_VP_INDEX_AVAILABLE              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6)
+#define HV_MSR_RESET_AVAILABLE                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7)
+#define HV_MSR_STAT_PAGES_AVAILABLE            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE         \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9)
+#define HV_MSR_GUEST_IDLE_AVAILABLE            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10)
+#define HV_ACCESS_FREQUENCY_MSRS               \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11)
+#define HV_ACCESS_REENLIGHTENMENT              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13)
+#define HV_ACCESS_TSC_INVARIANT                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15)
+
+/* HYPERV_CPUID_FEATURES.EBX */
+#define HV_CREATE_PARTITIONS                   \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0)
+#define HV_ACCESS_PARTITION_ID                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1)
+#define HV_ACCESS_MEMORY_POOL                  \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2)
+#define HV_ADJUST_MESSAGE_BUFFERS              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3)
+#define HV_POST_MESSAGES                       \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4)
+#define HV_SIGNAL_EVENTS                       \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5)
+#define HV_CREATE_PORT                         \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6)
+#define HV_CONNECT_PORT                                \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7)
+#define HV_ACCESS_STATS                                \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8)
+#define HV_DEBUGGING                           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
+#define HV_CPU_MANAGEMENT                      \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
+#define HV_ENABLE_EXTENDED_HYPERCALLS          \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
+#define HV_ISOLATION                           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
+
+/* HYPERV_CPUID_FEATURES.EDX */
+#define HV_X64_MWAIT_AVAILABLE                         \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0)
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE               \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1)
+#define HV_X64_PERF_MONITOR_AVAILABLE                  \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2)
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE      \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4)
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5)
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8)
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE           \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19)
+
+/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+#define HV_X64_AS_SWITCH_RECOMMENDED                   \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0)
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED             \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1)
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2)
+#define HV_X64_APIC_ACCESS_RECOMMENDED                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3)
+#define HV_X64_SYSTEM_RESET_RECOMMENDED                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4)
+#define HV_X64_RELAXED_TIMING_RECOMMENDED              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED                        \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9)
+#define HV_X64_CLUSTER_IPI_RECOMMENDED                 \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10)
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED          \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11)
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
+
+/* HYPERV_CPUID_NESTED_FEATURES.EAX */
+#define HV_X64_NESTED_DIRECT_FLUSH                     \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17)
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH              \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18)
+#define HV_X64_NESTED_MSR_BITMAP                       \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19)
+
+/* HYPERV_CPUID_NESTED_FEATURES.EBX */
+#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL          \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0)
+
+/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING       \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
+
+/* Hypercalls */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE     0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST      0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT           0x0008
+#define HVCALL_SEND_IPI                                0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
+#define HVCALL_SEND_IPI_EX                     0x0015
+#define HVCALL_GET_PARTITION_ID                        0x0046
+#define HVCALL_DEPOSIT_MEMORY                  0x0048
+#define HVCALL_CREATE_VP                       0x004e
+#define HVCALL_GET_VP_REGISTERS                        0x0050
+#define HVCALL_SET_VP_REGISTERS                        0x0051
+#define HVCALL_POST_MESSAGE                    0x005c
+#define HVCALL_SIGNAL_EVENT                    0x005d
+#define HVCALL_POST_DEBUG_DATA                 0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA             0x006a
+#define HVCALL_RESET_DEBUG_SESSION             0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR           0x0076
+#define HVCALL_MAP_DEVICE_INTERRUPT            0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT          0x007d
+#define HVCALL_RETARGET_INTERRUPT              0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+/* Extended hypercalls */
+#define HV_EXT_CALL_QUERY_CAPABILITIES         0x8001
+
+#define HV_FLUSH_ALL_PROCESSORS                        BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES    BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY      BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT     BIT(3)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS                      0
+#define HV_STATUS_INVALID_HYPERCALL_CODE       2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT      3
+#define HV_STATUS_INVALID_ALIGNMENT            4
+#define HV_STATUS_INVALID_PARAMETER            5
+#define HV_STATUS_ACCESS_DENIED                        6
+#define HV_STATUS_OPERATION_DENIED             8
+#define HV_STATUS_INSUFFICIENT_MEMORY          11
+#define HV_STATUS_INVALID_PORT_ID              17
+#define HV_STATUS_INVALID_CONNECTION_ID                18
+#define HV_STATUS_INSUFFICIENT_BUFFERS         19
+
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT          BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET    17
+#define HV_HYPERCALL_REP_COMP_OFFSET   32
+
+/*
+ * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
+ * is set to the hypercall status (if no exception occurred).
+ */
+static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+                                        vm_vaddr_t output_address,
+                                        uint64_t *hv_status)
+{
+       uint64_t error_code;
+       uint8_t vector;
+
+       /* Note both the hypercall and the "asm safe" clobber r9-r11. */
+       asm volatile("mov %[output_address], %%r8\n\t"
+                    KVM_ASM_SAFE("vmcall")
+                    : "=a" (*hv_status),
+                      "+c" (control), "+d" (input_address),
+                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)
+                    : [output_address] "r"(output_address),
+                      "a" (-EFAULT)
+                    : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
+       return vector;
+}
+
+/* Issue a Hyper-V hypercall and assert that it succeeded. */
+static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+                                   vm_vaddr_t output_address)
+{
+       uint64_t hv_status;
+       uint8_t vector;
+
+       vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
+
+       GUEST_ASSERT(!vector);
+       GUEST_ASSERT((hv_status & 0xffff) == 0);
+}
+
+/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */
+static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
+{
+       int i;
+
+       for (i = 0; i < n_sse_regs; i++)
+               write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
+}
+
+/* Proper HV_X64_MSR_GUEST_OS_ID value */
+#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
+
+#define HV_X64_MSR_VP_ASSIST_PAGE              0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE       0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT        12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
+               (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_nested_enlightenments_control {
+       struct {
+               __u32 directhypercall:1;
+               __u32 reserved:31;
+       } features;
+       struct {
+               __u32 reserved;
+       } hypercallControls;
+} __packed;
+
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+       __u32 apic_assist;
+       __u32 reserved1;
+       __u64 vtl_control[3];
+       struct hv_nested_enlightenments_control nested_control;
+       __u8 enlighten_vmentry;
+       __u8 reserved2[7];
+       __u64 current_nested_vmcs;
+} __packed;
+
+extern struct hv_vp_assist_page *current_vp_assist;
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
+
+struct hyperv_test_pages {
+       /* VP assist page */
+       void *vp_assist_hva;
+       uint64_t vp_assist_gpa;
+       void *vp_assist;
+
+       /* Partition assist page */
+       void *partition_assist_hva;
+       uint64_t partition_assist_gpa;
+       void *partition_assist;
+
+       /* Enlightened VMCS */
+       void *enlightened_vmcs_hva;
+       uint64_t enlightened_vmcs_gpa;
+       void *enlightened_vmcs;
+};
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+                                                      vm_vaddr_t *p_hv_pages_gva);
+
+/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
+#define HV_INVARIANT_TSC_EXPOSED               BIT_ULL(0)
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature);
+
+#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
new file mode 100644 (file)
index 0000000..972bb1c
--- /dev/null
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "kvm_util_types.h"
+#include "test_util.h"
+
+extern bool is_forced_emulation_enabled;
+
+struct kvm_vm_arch {
+       vm_vaddr_t gdt;
+       vm_vaddr_t tss;
+       vm_vaddr_t idt;
+
+       uint64_t c_bit;
+       uint64_t s_bit;
+       int sev_fd;
+       bool is_pt_protected;
+};
+
+static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
+{
+       return arch->c_bit || arch->s_bit;
+}
+
+#define vm_arch_has_protected_memory(vm) \
+       __vm_arch_has_protected_memory(&(vm)->arch)
+
+#define vcpu_arch_put_guest(mem, __val)                                                        \
+do {                                                                                   \
+       const typeof(mem) val = (__val);                                                \
+                                                                                       \
+       if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) {            \
+               (mem) = val;                                                            \
+       } else if (guest_random_bool(&guest_rng)) {                                     \
+               __asm__ __volatile__(KVM_FEP "mov %1, %0"                               \
+                                    : "+m" (mem)                                       \
+                                    : "r" (val) : "memory");                           \
+       } else {                                                                        \
+               uint64_t __old = READ_ONCE(mem);                                        \
+                                                                                       \
+               __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]"       \
+                                    : [ptr] "+m" (mem), [old] "+a" (__old)             \
+                                    : [new]"r" (val) : "memory", "cc");                \
+       }                                                                               \
+} while (0)
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h
new file mode 100644 (file)
index 0000000..295f2d5
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_MCE_H
+#define SELFTEST_KVM_MCE_H
+
+#define MCG_CTL_P              BIT_ULL(8)   /* MCG_CTL register available */
+#define MCG_SER_P              BIT_ULL(24)  /* MCA recovery/new status bits */
+#define MCG_LMCE_P             BIT_ULL(27)  /* Local machine check supported */
+#define MCG_CMCI_P             BIT_ULL(10)  /* CMCI supported */
+#define KVM_MAX_MCE_BANKS 32
+#define MCG_CAP_BANKS_MASK 0xff       /* Bit 0-7 of the MCG_CAP register are #banks */
+#define MCI_STATUS_VAL (1ULL << 63)   /* valid error */
+#define MCI_STATUS_UC (1ULL << 61)    /* uncorrected error */
+#define MCI_STATUS_EN (1ULL << 60)    /* error enabled */
+#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
+#define MCM_ADDR_PHYS 2    /* physical address */
+#define MCI_CTL2_CMCI_EN               BIT_ULL(30)
+
+#endif /* SELFTEST_KVM_MCE_H */
diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
new file mode 100644 (file)
index 0000000..3c10c4d
--- /dev/null
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include <stdint.h>
+
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS                        300
+
+/*
+ * Encode an eventsel+umask pair into event-select MSR format.  Note, this is
+ * technically AMD's format, as Intel's format only supports 8 bits for the
+ * event selector, i.e. doesn't use bits 24:16 for the selector.  But, OR-ing
+ * in '0' is a nop and won't clobber the CMASK.
+ */
+#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) |     \
+                                   ((eventsel) & 0xff) |               \
+                                   ((umask) & 0xff) << 8)
+
+/*
+ * These are technically Intel's definitions, but except for CMASK (see above),
+ * AMD's layout is compatible with Intel's.
+ */
+#define ARCH_PERFMON_EVENTSEL_EVENT            GENMASK_ULL(7, 0)
+#define ARCH_PERFMON_EVENTSEL_UMASK            GENMASK_ULL(15, 8)
+#define ARCH_PERFMON_EVENTSEL_USR              BIT_ULL(16)
+#define ARCH_PERFMON_EVENTSEL_OS               BIT_ULL(17)
+#define ARCH_PERFMON_EVENTSEL_EDGE             BIT_ULL(18)
+#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL      BIT_ULL(19)
+#define ARCH_PERFMON_EVENTSEL_INT              BIT_ULL(20)
+#define ARCH_PERFMON_EVENTSEL_ANY              BIT_ULL(21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE           BIT_ULL(22)
+#define ARCH_PERFMON_EVENTSEL_INV              BIT_ULL(23)
+#define ARCH_PERFMON_EVENTSEL_CMASK            GENMASK_ULL(31, 24)
+
+/* RDPMC control flags, Intel only. */
+#define INTEL_RDPMC_METRICS                    BIT_ULL(29)
+#define INTEL_RDPMC_FIXED                      BIT_ULL(30)
+#define INTEL_RDPMC_FAST                       BIT_ULL(31)
+
+/* Fixed PMC controls, Intel only. */
+#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx)     BIT_ULL((32 + (_idx)))
+
+#define FIXED_PMC_KERNEL                       BIT_ULL(0)
+#define FIXED_PMC_USER                         BIT_ULL(1)
+#define FIXED_PMC_ANYTHREAD                    BIT_ULL(2)
+#define FIXED_PMC_ENABLE_PMI                   BIT_ULL(3)
+#define FIXED_PMC_NR_BITS                      4
+#define FIXED_PMC_CTRL(_idx, _val)             ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
+
+#define PMU_CAP_FW_WRITES                      BIT_ULL(13)
+#define PMU_CAP_LBR_FMT                                0x3f
+
+#define        INTEL_ARCH_CPU_CYCLES                   RAW_EVENT(0x3c, 0x00)
+#define        INTEL_ARCH_INSTRUCTIONS_RETIRED         RAW_EVENT(0xc0, 0x00)
+#define        INTEL_ARCH_REFERENCE_CYCLES             RAW_EVENT(0x3c, 0x01)
+#define        INTEL_ARCH_LLC_REFERENCES               RAW_EVENT(0x2e, 0x4f)
+#define        INTEL_ARCH_LLC_MISSES                   RAW_EVENT(0x2e, 0x41)
+#define        INTEL_ARCH_BRANCHES_RETIRED             RAW_EVENT(0xc4, 0x00)
+#define        INTEL_ARCH_BRANCHES_MISPREDICTED        RAW_EVENT(0xc5, 0x00)
+#define        INTEL_ARCH_TOPDOWN_SLOTS                RAW_EVENT(0xa4, 0x01)
+
+#define        AMD_ZEN_CORE_CYCLES                     RAW_EVENT(0x76, 0x00)
+#define        AMD_ZEN_INSTRUCTIONS_RETIRED            RAW_EVENT(0xc0, 0x00)
+#define        AMD_ZEN_BRANCHES_RETIRED                RAW_EVENT(0xc2, 0x00)
+#define        AMD_ZEN_BRANCHES_MISPREDICTED           RAW_EVENT(0xc3, 0x00)
+
+/*
+ * Note!  The order and thus the index of the architectural events matters as
+ * support for each event is enumerated via CPUID using the index of the event.
+ */
+enum intel_pmu_architectural_events {
+       INTEL_ARCH_CPU_CYCLES_INDEX,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
+       INTEL_ARCH_REFERENCE_CYCLES_INDEX,
+       INTEL_ARCH_LLC_REFERENCES_INDEX,
+       INTEL_ARCH_LLC_MISSES_INDEX,
+       INTEL_ARCH_BRANCHES_RETIRED_INDEX,
+       INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
+       INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+       NR_INTEL_ARCH_EVENTS,
+};
+
+enum amd_pmu_zen_events {
+       AMD_ZEN_CORE_CYCLES_INDEX,
+       AMD_ZEN_INSTRUCTIONS_INDEX,
+       AMD_ZEN_BRANCHES_INDEX,
+       AMD_ZEN_BRANCH_MISSES_INDEX,
+       NR_AMD_ZEN_EVENTS,
+};
+
+extern const uint64_t intel_pmu_arch_events[];
+extern const uint64_t amd_pmu_zen_events[];
+
+#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
new file mode 100644 (file)
index 0000000..9ec984c
--- /dev/null
@@ -0,0 +1,1395 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <assert.h>
+#include <stdint.h>
+#include <syscall.h>
+
+#include <asm/msr-index.h>
+#include <asm/prctl.h>
+
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+extern bool host_cpu_is_intel;
+extern bool host_cpu_is_amd;
+extern uint64_t guest_tsc_khz;
+
+#ifndef MAX_NR_CPUID_ENTRIES
+#define MAX_NR_CPUID_ENTRIES 100
+#endif
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+
+#define NMI_VECTOR             0x02
+
+#define X86_EFLAGS_FIXED        (1u << 1)
+
+#define X86_CR4_VME            (1ul << 0)
+#define X86_CR4_PVI            (1ul << 1)
+#define X86_CR4_TSD            (1ul << 2)
+#define X86_CR4_DE             (1ul << 3)
+#define X86_CR4_PSE            (1ul << 4)
+#define X86_CR4_PAE            (1ul << 5)
+#define X86_CR4_MCE            (1ul << 6)
+#define X86_CR4_PGE            (1ul << 7)
+#define X86_CR4_PCE            (1ul << 8)
+#define X86_CR4_OSFXSR         (1ul << 9)
+#define X86_CR4_OSXMMEXCPT     (1ul << 10)
+#define X86_CR4_UMIP           (1ul << 11)
+#define X86_CR4_LA57           (1ul << 12)
+#define X86_CR4_VMXE           (1ul << 13)
+#define X86_CR4_SMXE           (1ul << 14)
+#define X86_CR4_FSGSBASE       (1ul << 16)
+#define X86_CR4_PCIDE          (1ul << 17)
+#define X86_CR4_OSXSAVE                (1ul << 18)
+#define X86_CR4_SMEP           (1ul << 20)
+#define X86_CR4_SMAP           (1ul << 21)
+#define X86_CR4_PKE            (1ul << 22)
+
+struct xstate_header {
+       u64                             xstate_bv;
+       u64                             xcomp_bv;
+       u64                             reserved[6];
+} __attribute__((packed));
+
+struct xstate {
+       u8                              i387[512];
+       struct xstate_header            header;
+       u8                              extended_state_area[0];
+} __attribute__ ((packed, aligned (64)));
+
+#define XFEATURE_MASK_FP               BIT_ULL(0)
+#define XFEATURE_MASK_SSE              BIT_ULL(1)
+#define XFEATURE_MASK_YMM              BIT_ULL(2)
+#define XFEATURE_MASK_BNDREGS          BIT_ULL(3)
+#define XFEATURE_MASK_BNDCSR           BIT_ULL(4)
+#define XFEATURE_MASK_OPMASK           BIT_ULL(5)
+#define XFEATURE_MASK_ZMM_Hi256                BIT_ULL(6)
+#define XFEATURE_MASK_Hi16_ZMM         BIT_ULL(7)
+#define XFEATURE_MASK_PT               BIT_ULL(8)
+#define XFEATURE_MASK_PKRU             BIT_ULL(9)
+#define XFEATURE_MASK_PASID            BIT_ULL(10)
+#define XFEATURE_MASK_CET_USER         BIT_ULL(11)
+#define XFEATURE_MASK_CET_KERNEL       BIT_ULL(12)
+#define XFEATURE_MASK_LBR              BIT_ULL(15)
+#define XFEATURE_MASK_XTILE_CFG                BIT_ULL(17)
+#define XFEATURE_MASK_XTILE_DATA       BIT_ULL(18)
+
+#define XFEATURE_MASK_AVX512           (XFEATURE_MASK_OPMASK | \
+                                        XFEATURE_MASK_ZMM_Hi256 | \
+                                        XFEATURE_MASK_Hi16_ZMM)
+#define XFEATURE_MASK_XTILE            (XFEATURE_MASK_XTILE_DATA | \
+                                        XFEATURE_MASK_XTILE_CFG)
+
+/* Note, these are ordered alphabetically to match kvm_cpuid_entry2.  Eww. */
+enum cpuid_output_regs {
+       KVM_CPUID_EAX,
+       KVM_CPUID_EBX,
+       KVM_CPUID_ECX,
+       KVM_CPUID_EDX
+};
+
+/*
+ * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
+ * passed by value with no overhead.
+ */
+struct kvm_x86_cpu_feature {
+       u32     function;
+       u16     index;
+       u8      reg;
+       u8      bit;
+};
+#define        KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit)                                \
+({                                                                             \
+       struct kvm_x86_cpu_feature feature = {                                  \
+               .function = fn,                                                 \
+               .index = idx,                                                   \
+               .reg = KVM_CPUID_##gpr,                                         \
+               .bit = __bit,                                                   \
+       };                                                                      \
+                                                                               \
+       kvm_static_assert((fn & 0xc0000000) == 0 ||                             \
+                         (fn & 0xc0000000) == 0x40000000 ||                    \
+                         (fn & 0xc0000000) == 0x80000000 ||                    \
+                         (fn & 0xc0000000) == 0xc0000000);                     \
+       kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE));    \
+       feature;                                                                \
+})
+
+/*
+ * Basic Leafs, a.k.a. Intel defined
+ */
+#define        X86_FEATURE_MWAIT               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
+#define        X86_FEATURE_VMX                 KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
+#define        X86_FEATURE_SMX                 KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
+#define        X86_FEATURE_PDCM                KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
+#define        X86_FEATURE_PCID                KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
+#define X86_FEATURE_X2APIC             KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
+#define        X86_FEATURE_MOVBE               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
+#define        X86_FEATURE_TSC_DEADLINE_TIMER  KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
+#define        X86_FEATURE_XSAVE               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
+#define        X86_FEATURE_OSXSAVE             KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
+#define        X86_FEATURE_RDRAND              KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
+#define        X86_FEATURE_HYPERVISOR          KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
+#define X86_FEATURE_PAE                        KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
+#define        X86_FEATURE_MCE                 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
+#define        X86_FEATURE_APIC                KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
+#define        X86_FEATURE_CLFLUSH             KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
+#define        X86_FEATURE_XMM                 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
+#define        X86_FEATURE_XMM2                KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
+#define        X86_FEATURE_FSGSBASE            KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
+#define        X86_FEATURE_TSC_ADJUST          KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
+#define        X86_FEATURE_SGX                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
+#define        X86_FEATURE_HLE                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
+#define        X86_FEATURE_SMEP                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
+#define        X86_FEATURE_INVPCID             KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
+#define        X86_FEATURE_RTM                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
+#define        X86_FEATURE_MPX                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
+#define        X86_FEATURE_SMAP                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
+#define        X86_FEATURE_PCOMMIT             KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
+#define        X86_FEATURE_CLFLUSHOPT          KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
+#define        X86_FEATURE_CLWB                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
+#define        X86_FEATURE_UMIP                KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
+#define        X86_FEATURE_PKU                 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
+#define        X86_FEATURE_OSPKE               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
+#define        X86_FEATURE_LA57                KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
+#define        X86_FEATURE_RDPID               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
+#define        X86_FEATURE_SGX_LC              KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
+#define        X86_FEATURE_SHSTK               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
+#define        X86_FEATURE_IBT                 KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
+#define        X86_FEATURE_AMX_TILE            KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
+#define        X86_FEATURE_SPEC_CTRL           KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
+#define        X86_FEATURE_ARCH_CAPABILITIES   KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
+#define        X86_FEATURE_PKS                 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
+#define        X86_FEATURE_XTILECFG            KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
+#define        X86_FEATURE_XTILEDATA           KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
+#define        X86_FEATURE_XSAVES              KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
+#define        X86_FEATURE_XFD                 KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
+#define X86_FEATURE_XTILEDATA_XFD      KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
+
+/*
+ * Extended Leafs, a.k.a. AMD defined
+ */
+#define        X86_FEATURE_SVM                 KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define        X86_FEATURE_NX                  KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
+#define        X86_FEATURE_GBPAGES             KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
+#define        X86_FEATURE_RDTSCP              KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
+#define        X86_FEATURE_LM                  KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
+#define        X86_FEATURE_INVTSC              KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
+#define        X86_FEATURE_RDPRU               KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
+#define        X86_FEATURE_AMD_IBPB            KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
+#define        X86_FEATURE_NPT                 KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
+#define        X86_FEATURE_LBRV                KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
+#define        X86_FEATURE_NRIPS               KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
+#define X86_FEATURE_TSCRATEMSR          KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
+#define X86_FEATURE_PAUSEFILTER         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
+#define X86_FEATURE_PFTHRESHOLD         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
+#define        X86_FEATURE_VGIF                KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_SEV                        KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
+#define X86_FEATURE_SEV_ES             KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+
+/*
+ * KVM defined paravirt features.
+ */
+#define X86_FEATURE_KVM_CLOCKSOURCE    KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
+#define X86_FEATURE_KVM_NOP_IO_DELAY   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
+#define X86_FEATURE_KVM_MMU_OP         KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
+#define X86_FEATURE_KVM_CLOCKSOURCE2   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
+#define X86_FEATURE_KVM_ASYNC_PF       KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
+#define X86_FEATURE_KVM_STEAL_TIME     KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
+#define X86_FEATURE_KVM_PV_EOI         KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
+#define X86_FEATURE_KVM_PV_UNHALT      KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
+/* Bit 8 apparently isn't used?!?! */
+#define X86_FEATURE_KVM_PV_TLB_FLUSH   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
+#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT        KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
+#define X86_FEATURE_KVM_PV_SEND_IPI    KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
+#define X86_FEATURE_KVM_POLL_CONTROL   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
+#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
+#define X86_FEATURE_KVM_ASYNC_PF_INT   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
+#define X86_FEATURE_KVM_MSI_EXT_DEST_ID        KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
+#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE       KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
+#define X86_FEATURE_KVM_MIGRATION_CONTROL      KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
+
+/*
+ * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
+ * value/property as opposed to a single-bit feature.  Again, pack the info
+ * into a 64-bit value to pass by value with no overhead.
+ */
+struct kvm_x86_cpu_property {
+       u32     function;
+       u8      index;
+       u8      reg;
+       u8      lo_bit;
+       u8      hi_bit;
+};
+#define        KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit)                   \
+({                                                                             \
+       struct kvm_x86_cpu_property property = {                                \
+               .function = fn,                                                 \
+               .index = idx,                                                   \
+               .reg = KVM_CPUID_##gpr,                                         \
+               .lo_bit = low_bit,                                              \
+               .hi_bit = high_bit,                                             \
+       };                                                                      \
+                                                                               \
+       kvm_static_assert(low_bit < high_bit);                                  \
+       kvm_static_assert((fn & 0xc0000000) == 0 ||                             \
+                         (fn & 0xc0000000) == 0x40000000 ||                    \
+                         (fn & 0xc0000000) == 0x80000000 ||                    \
+                         (fn & 0xc0000000) == 0xc0000000);                     \
+       kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE));   \
+       property;                                                               \
+})
+
+#define X86_PROPERTY_MAX_BASIC_LEAF            KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
+#define X86_PROPERTY_PMU_VERSION               KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
+#define X86_PROPERTY_PMU_NR_GP_COUNTERS                KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
+#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_PMU_EVENTS_MASK           KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK        KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
+#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS     KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH      KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
+
+#define X86_PROPERTY_SUPPORTED_XCR0_LO         KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0      KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE           KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
+#define X86_PROPERTY_SUPPORTED_XCR0_HI         KVM_X86_CPU_PROPERTY(0xd,  0, EDX,  0, 31)
+
+#define X86_PROPERTY_XSTATE_TILE_SIZE          KVM_X86_CPU_PROPERTY(0xd, 18, EAX,  0, 31)
+#define X86_PROPERTY_XSTATE_TILE_OFFSET                KVM_X86_CPU_PROPERTY(0xd, 18, EBX,  0, 31)
+#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES    KVM_X86_CPU_PROPERTY(0x1d, 0, EAX,  0, 31)
+#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES      KVM_X86_CPU_PROPERTY(0x1d, 1, EAX,  0, 15)
+#define X86_PROPERTY_AMX_BYTES_PER_TILE                KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
+#define X86_PROPERTY_AMX_BYTES_PER_ROW         KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0,  15)
+#define X86_PROPERTY_AMX_NR_TILE_REGS          KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
+#define X86_PROPERTY_AMX_MAX_ROWS              KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0,  15)
+
+#define X86_PROPERTY_MAX_KVM_LEAF              KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
+
+#define X86_PROPERTY_MAX_EXT_LEAF              KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
+#define X86_PROPERTY_MAX_PHY_ADDR              KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
+#define X86_PROPERTY_MAX_VIRT_ADDR             KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_GUEST_MAX_PHY_ADDR                KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
+#define X86_PROPERTY_SEV_C_BIT                 KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
+#define X86_PROPERTY_PHYS_ADDR_REDUCTION       KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+
+#define X86_PROPERTY_MAX_CENTAUR_LEAF          KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
+
+/*
+ * Intel's architectural PMU events are bizarre.  They have a "feature" bit
+ * that indicates the feature is _not_ supported, and a property that states
+ * the length of the bit mask of unsupported features.  A feature is supported
+ * if the size of the bit mask is larger than the "unavailable" bit, and said
+ * bit is not set.  Fixed counters also bizarre enumeration, but inverted from
+ * arch events for general purpose counters.  Fixed counters are supported if a
+ * feature flag is set **OR** the total number of fixed counters is greater
+ * than index of the counter.
+ *
+ * Wrap the events for general purpose and fixed counters to simplify checking
+ * whether or not a given architectural event is supported.
+ */
+struct kvm_x86_pmu_feature {
+       struct kvm_x86_cpu_feature f;
+};
+#define        KVM_X86_PMU_FEATURE(__reg, __bit)                               \
+({                                                                     \
+       struct kvm_x86_pmu_feature feature = {                          \
+               .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit),         \
+       };                                                              \
+                                                                       \
+       kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX ||         \
+                         KVM_CPUID_##__reg == KVM_CPUID_ECX);          \
+       feature;                                                        \
+})
+
+#define X86_PMU_FEATURE_CPU_CYCLES                     KVM_X86_PMU_FEATURE(EBX, 0)
+#define X86_PMU_FEATURE_INSNS_RETIRED                  KVM_X86_PMU_FEATURE(EBX, 1)
+#define X86_PMU_FEATURE_REFERENCE_CYCLES               KVM_X86_PMU_FEATURE(EBX, 2)
+#define X86_PMU_FEATURE_LLC_REFERENCES                 KVM_X86_PMU_FEATURE(EBX, 3)
+#define X86_PMU_FEATURE_LLC_MISSES                     KVM_X86_PMU_FEATURE(EBX, 4)
+#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED           KVM_X86_PMU_FEATURE(EBX, 5)
+#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED          KVM_X86_PMU_FEATURE(EBX, 6)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS                  KVM_X86_PMU_FEATURE(EBX, 7)
+
+#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED            KVM_X86_PMU_FEATURE(ECX, 0)
+#define X86_PMU_FEATURE_CPU_CYCLES_FIXED               KVM_X86_PMU_FEATURE(ECX, 1)
+#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED     KVM_X86_PMU_FEATURE(ECX, 2)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED            KVM_X86_PMU_FEATURE(ECX, 3)
+
+static inline unsigned int x86_family(unsigned int eax)
+{
+       unsigned int x86;
+
+       x86 = (eax >> 8) & 0xf;
+
+       if (x86 == 0xf)
+               x86 += (eax >> 20) & 0xff;
+
+       return x86;
+}
+
+static inline unsigned int x86_model(unsigned int eax)
+{
+       return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
+}
+
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK        BIT_ULL(0)
+#define PTE_WRITABLE_MASK       BIT_ULL(1)
+#define PTE_USER_MASK           BIT_ULL(2)
+#define PTE_ACCESSED_MASK       BIT_ULL(5)
+#define PTE_DIRTY_MASK          BIT_ULL(6)
+#define PTE_LARGE_MASK          BIT_ULL(7)
+#define PTE_GLOBAL_MASK         BIT_ULL(8)
+#define PTE_NX_MASK             BIT_ULL(63)
+
+#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
+
+#define PAGE_SHIFT             12
+#define PAGE_SIZE              (1ULL << PAGE_SHIFT)
+#define PAGE_MASK              (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
+
+#define HUGEPAGE_SHIFT(x)      (PAGE_SHIFT + (((x) - 1) * 9))
+#define HUGEPAGE_SIZE(x)       (1UL << HUGEPAGE_SHIFT(x))
+#define HUGEPAGE_MASK(x)       (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
+
+#define PTE_GET_PA(pte)                ((pte) & PHYSICAL_PAGE_MASK)
+#define PTE_GET_PFN(pte)        (PTE_GET_PA(pte) >> PAGE_SHIFT)
+
+/* General Registers in 64-Bit Mode */
+struct gpr64_regs {
+       u64 rax;
+       u64 rcx;
+       u64 rdx;
+       u64 rbx;
+       u64 rsp;
+       u64 rbp;
+       u64 rsi;
+       u64 rdi;
+       u64 r8;
+       u64 r9;
+       u64 r10;
+       u64 r11;
+       u64 r12;
+       u64 r13;
+       u64 r14;
+       u64 r15;
+};
+
+struct desc64 {
+       uint16_t limit0;
+       uint16_t base0;
+       unsigned base1:8, type:4, s:1, dpl:2, p:1;
+       unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
+       uint32_t base3;
+       uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+       uint16_t size;
+       uint64_t address;
+} __attribute__((packed));
+
+struct kvm_x86_state {
+       struct kvm_xsave *xsave;
+       struct kvm_vcpu_events events;
+       struct kvm_mp_state mp_state;
+       struct kvm_regs regs;
+       struct kvm_xcrs xcrs;
+       struct kvm_sregs sregs;
+       struct kvm_debugregs debugregs;
+       union {
+               struct kvm_nested_state nested;
+               char nested_[16384];
+       };
+       struct kvm_msrs msrs;
+};
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+       return ((uint64_t)desc->base3 << 32) |
+               (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+       uint32_t eax, edx;
+       uint64_t tsc_val;
+       /*
+        * The lfence is to wait (on Intel CPUs) until all previous
+        * instructions have been executed. If software requires RDTSC to be
+        * executed prior to execution of any subsequent instruction, it can
+        * execute LFENCE immediately after RDTSC
+        */
+       __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+       tsc_val = ((uint64_t)edx) << 32 | eax;
+       return tsc_val;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+       uint32_t eax, edx;
+
+       __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+       return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+       uint16_t tmp;
+
+       __asm__ __volatile__("in %%dx, %%ax"
+               : /* output */ "=a" (tmp)
+               : /* input */ "d" (port));
+
+       return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+       uint16_t es;
+
+       __asm__ __volatile__("mov %%es, %[es]"
+                            : /* output */ [es]"=rm"(es));
+       return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+       uint16_t cs;
+
+       __asm__ __volatile__("mov %%cs, %[cs]"
+                            : /* output */ [cs]"=rm"(cs));
+       return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+       uint16_t ss;
+
+       __asm__ __volatile__("mov %%ss, %[ss]"
+                            : /* output */ [ss]"=rm"(ss));
+       return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+       uint16_t ds;
+
+       __asm__ __volatile__("mov %%ds, %[ds]"
+                            : /* output */ [ds]"=rm"(ds));
+       return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+       uint16_t fs;
+
+       __asm__ __volatile__("mov %%fs, %[fs]"
+                            : /* output */ [fs]"=rm"(fs));
+       return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+       uint16_t gs;
+
+       __asm__ __volatile__("mov %%gs, %[gs]"
+                            : /* output */ [gs]"=rm"(gs));
+       return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+       uint16_t tr;
+
+       __asm__ __volatile__("str %[tr]"
+                            : /* output */ [tr]"=rm"(tr));
+       return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+       uint64_t cr0;
+
+       __asm__ __volatile__("mov %%cr0, %[cr0]"
+                            : /* output */ [cr0]"=r"(cr0));
+       return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+       uint64_t cr3;
+
+       __asm__ __volatile__("mov %%cr3, %[cr3]"
+                            : /* output */ [cr3]"=r"(cr3));
+       return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+       uint64_t cr4;
+
+       __asm__ __volatile__("mov %%cr4, %[cr4]"
+                            : /* output */ [cr4]"=r"(cr4));
+       return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+       __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline u64 xgetbv(u32 index)
+{
+       u32 eax, edx;
+
+       __asm__ __volatile__("xgetbv;"
+                    : "=a" (eax), "=d" (edx)
+                    : "c" (index));
+       return eax | ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+       u32 eax = value;
+       u32 edx = value >> 32;
+
+       __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+static inline void wrpkru(u32 pkru)
+{
+       /* Note, ECX and EDX are architecturally required to be '0'. */
+       asm volatile(".byte 0x0f,0x01,0xef\n\t"
+                    : : "a" (pkru), "c"(0), "d"(0));
+}
+
+static inline struct desc_ptr get_gdt(void)
+{
+       struct desc_ptr gdt;
+       __asm__ __volatile__("sgdt %[gdt]"
+                            : /* output */ [gdt]"=m"(gdt));
+       return gdt;
+}
+
+static inline struct desc_ptr get_idt(void)
+{
+       struct desc_ptr idt;
+       __asm__ __volatile__("sidt %[idt]"
+                            : /* output */ [idt]"=m"(idt));
+       return idt;
+}
+
+static inline void outl(uint16_t port, uint32_t value)
+{
+       __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
+}
+
+static inline void __cpuid(uint32_t function, uint32_t index,
+                          uint32_t *eax, uint32_t *ebx,
+                          uint32_t *ecx, uint32_t *edx)
+{
+       *eax = function;
+       *ecx = index;
+
+       asm volatile("cpuid"
+           : "=a" (*eax),
+             "=b" (*ebx),
+             "=c" (*ecx),
+             "=d" (*edx)
+           : "0" (*eax), "2" (*ecx)
+           : "memory");
+}
+
+static inline void cpuid(uint32_t function,
+                        uint32_t *eax, uint32_t *ebx,
+                        uint32_t *ecx, uint32_t *edx)
+{
+       return __cpuid(function, 0, eax, ebx, ecx, edx);
+}
+
+static inline uint32_t this_cpu_fms(void)
+{
+       uint32_t eax, ebx, ecx, edx;
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+       return eax;
+}
+
+static inline uint32_t this_cpu_family(void)
+{
+       return x86_family(this_cpu_fms());
+}
+
+static inline uint32_t this_cpu_model(void)
+{
+       return x86_model(this_cpu_fms());
+}
+
+static inline bool this_cpu_vendor_string_is(const char *vendor)
+{
+       const uint32_t *chunk = (const uint32_t *)vendor;
+       uint32_t eax, ebx, ecx, edx;
+
+       cpuid(0, &eax, &ebx, &ecx, &edx);
+       return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
+}
+
+static inline bool this_cpu_is_intel(void)
+{
+       return this_cpu_vendor_string_is("GenuineIntel");
+}
+
+/*
+ * Exclude early K5 samples with a vendor string of "AMDisbetter!"
+ */
+static inline bool this_cpu_is_amd(void)
+{
+       return this_cpu_vendor_string_is("AuthenticAMD");
+}
+
+static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
+                                     uint8_t reg, uint8_t lo, uint8_t hi)
+{
+       uint32_t gprs[4];
+
+       __cpuid(function, index,
+               &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
+               &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
+
+       return (gprs[reg] & GENMASK(hi, lo)) >> lo;
+}
+
+static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+       return __this_cpu_has(feature.function, feature.index,
+                             feature.reg, feature.bit, feature.bit);
+}
+
+static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
+{
+       return __this_cpu_has(property.function, property.index,
+                             property.reg, property.lo_bit, property.hi_bit);
+}
+
+static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+       uint32_t max_leaf;
+
+       switch (property.function & 0xc0000000) {
+       case 0:
+               max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+               break;
+       case 0x40000000:
+               max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+               break;
+       case 0x80000000:
+               max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+               break;
+       case 0xc0000000:
+               max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+       }
+       return max_leaf >= property.function;
+}
+
+static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+       uint32_t nr_bits;
+
+       if (feature.f.reg == KVM_CPUID_EBX) {
+               nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+               return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
+       }
+
+       GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
+       nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       return nr_bits > feature.f.bit || this_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t this_cpu_supported_xcr0(void)
+{
+       if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+               return 0;
+
+       return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+              ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+typedef u32            __attribute__((vector_size(16))) sse128_t;
+#define __sse128_u     union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
+#define sse128_lo(x)   ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
+#define sse128_hi(x)   ({ __sse128_u t; t.vec = x; t.as_u64[1]; })
+
+static inline void read_sse_reg(int reg, sse128_t *data)
+{
+       switch (reg) {
+       case 0:
+               asm("movdqa %%xmm0, %0" : "=m"(*data));
+               break;
+       case 1:
+               asm("movdqa %%xmm1, %0" : "=m"(*data));
+               break;
+       case 2:
+               asm("movdqa %%xmm2, %0" : "=m"(*data));
+               break;
+       case 3:
+               asm("movdqa %%xmm3, %0" : "=m"(*data));
+               break;
+       case 4:
+               asm("movdqa %%xmm4, %0" : "=m"(*data));
+               break;
+       case 5:
+               asm("movdqa %%xmm5, %0" : "=m"(*data));
+               break;
+       case 6:
+               asm("movdqa %%xmm6, %0" : "=m"(*data));
+               break;
+       case 7:
+               asm("movdqa %%xmm7, %0" : "=m"(*data));
+               break;
+       default:
+               BUG();
+       }
+}
+
+static inline void write_sse_reg(int reg, const sse128_t *data)
+{
+       switch (reg) {
+       case 0:
+               asm("movdqa %0, %%xmm0" : : "m"(*data));
+               break;
+       case 1:
+               asm("movdqa %0, %%xmm1" : : "m"(*data));
+               break;
+       case 2:
+               asm("movdqa %0, %%xmm2" : : "m"(*data));
+               break;
+       case 3:
+               asm("movdqa %0, %%xmm3" : : "m"(*data));
+               break;
+       case 4:
+               asm("movdqa %0, %%xmm4" : : "m"(*data));
+               break;
+       case 5:
+               asm("movdqa %0, %%xmm5" : : "m"(*data));
+               break;
+       case 6:
+               asm("movdqa %0, %%xmm6" : : "m"(*data));
+               break;
+       case 7:
+               asm("movdqa %0, %%xmm7" : : "m"(*data));
+               break;
+       default:
+               BUG();
+       }
+}
+
+static inline void cpu_relax(void)
+{
+       asm volatile("rep; nop" ::: "memory");
+}
+
+static inline void udelay(unsigned long usec)
+{
+       uint64_t start, now, cycles;
+
+       GUEST_ASSERT(guest_tsc_khz);
+       cycles = guest_tsc_khz / 1000 * usec;
+
+       /*
+        * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
+        * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
+        */
+       start = rdtsc();
+       do {
+               now = rdtsc();
+       } while (now - start < cycles);
+}
+
+#define ud2()                  \
+       __asm__ __volatile__(   \
+               "ud2\n" \
+               )
+
+#define hlt()                  \
+       __asm__ __volatile__(   \
+               "hlt\n" \
+               )
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
+void kvm_x86_state_cleanup(struct kvm_x86_state *state);
+
+const struct kvm_msr_list *kvm_get_msr_index_list(void);
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
+uint64_t kvm_get_feature_msr(uint64_t msr_index);
+
+static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
+                                struct kvm_msrs *msrs)
+{
+       int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
+
+       TEST_ASSERT(r == msrs->nmsrs,
+                   "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
+                   r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
+{
+       int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
+
+       TEST_ASSERT(r == msrs->nmsrs,
+                   "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
+                   r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
+                                     struct kvm_debugregs *debugregs)
+{
+       vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
+                                     struct kvm_debugregs *debugregs)
+{
+       vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
+                                 struct kvm_xsave *xsave)
+{
+       vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
+}
+static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
+                                  struct kvm_xsave *xsave)
+{
+       vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
+}
+static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
+                                 struct kvm_xsave *xsave)
+{
+       vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
+}
+static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
+                                struct kvm_xcrs *xcrs)
+{
+       vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
+}
+static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
+{
+       vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
+}
+
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+                                              uint32_t function, uint32_t index);
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+
+static inline uint32_t kvm_cpu_fms(void)
+{
+       return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
+}
+
+static inline uint32_t kvm_cpu_family(void)
+{
+       return x86_family(kvm_cpu_fms());
+}
+
+static inline uint32_t kvm_cpu_model(void)
+{
+       return x86_model(kvm_cpu_fms());
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+                  struct kvm_x86_cpu_feature feature);
+
+static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+       return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+                           struct kvm_x86_cpu_property property);
+
+static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
+{
+       return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
+}
+
+static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+       uint32_t max_leaf;
+
+       switch (property.function & 0xc0000000) {
+       case 0:
+               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+               break;
+       case 0x40000000:
+               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+               break;
+       case 0x80000000:
+               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+               break;
+       case 0xc0000000:
+               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+       }
+       return max_leaf >= property.function;
+}
+
+static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+       uint32_t nr_bits;
+
+       if (feature.f.reg == KVM_CPUID_EBX) {
+               nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+               return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
+       }
+
+       TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
+       nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
+{
+       if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+               return 0;
+
+       return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+              ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+static inline size_t kvm_cpuid2_size(int nr_entries)
+{
+       return sizeof(struct kvm_cpuid2) +
+              sizeof(struct kvm_cpuid_entry2) * nr_entries;
+}
+
+/*
+ * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
+ * entries sized to hold @nr_entries.  The caller is responsible for freeing
+ * the struct.
+ */
+static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
+{
+       struct kvm_cpuid2 *cpuid;
+
+       cpuid = malloc(kvm_cpuid2_size(nr_entries));
+       TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
+
+       cpuid->nent = nr_entries;
+
+       return cpuid;
+}
+
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
+
+static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+                                                             uint32_t function,
+                                                             uint32_t index)
+{
+       return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
+                                                         function, index);
+}
+
+static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+                                                           uint32_t function)
+{
+       return __vcpu_get_cpuid_entry(vcpu, function, 0);
+}
+
+static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+       int r;
+
+       TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+       r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+       if (r)
+               return r;
+
+       /* On success, refresh the cache to pick up adjustments made by KVM. */
+       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+       return 0;
+}
+
+static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+       TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+       vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+
+       /* Refresh the cache to pick up adjustments made by KVM. */
+       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
+static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
+{
+       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+                            struct kvm_x86_cpu_property property,
+                            uint32_t value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+
+static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
+                                 struct kvm_x86_cpu_feature feature)
+{
+       struct kvm_cpuid_entry2 *entry;
+
+       entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+       return *((&entry->eax) + feature.reg) & BIT(feature.bit);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+                                    struct kvm_x86_cpu_feature feature,
+                                    bool set);
+
+static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
+                                         struct kvm_x86_cpu_feature feature)
+{
+       vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
+
+}
+
+static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+                                           struct kvm_x86_cpu_feature feature)
+{
+       vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
+
+/*
+ * Assert on an MSR access(es) and pretty print the MSR name when possible.
+ * Note, the caller provides the stringified name so that the name of macro is
+ * printed, not the value the macro resolves to (due to macro expansion).
+ */
+#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...)                          \
+do {                                                                           \
+       if (__builtin_constant_p(msr)) {                                        \
+               TEST_ASSERT(cond, fmt, str, args);                              \
+       } else if (!(cond)) {                                                   \
+               char buf[16];                                                   \
+                                                                               \
+               snprintf(buf, sizeof(buf), "MSR 0x%x", msr);                    \
+               TEST_ASSERT(cond, fmt, buf, args);                              \
+       }                                                                       \
+} while (0)
+
+/*
+ * Returns true if KVM should return the last written value when reading an MSR
+ * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
+ * is changing, etc.  This is NOT an exhaustive list!  The intent is to filter
+ * out MSRs that are not durable _and_ that a selftest wants to write.
+ */
+static inline bool is_durable_msr(uint32_t msr)
+{
+       return msr != MSR_IA32_TSC;
+}
+
+#define vcpu_set_msr(vcpu, msr, val)                                                   \
+do {                                                                                   \
+       uint64_t r, v = val;                                                            \
+                                                                                       \
+       TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1,                               \
+                       "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v);      \
+       if (!is_durable_msr(msr))                                                       \
+               break;                                                                  \
+       r = vcpu_get_msr(vcpu, msr);                                                    \
+       TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
+} while (0)
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+void kvm_init_vm_address_properties(struct kvm_vm *vm);
+bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+
+struct ex_regs {
+       uint64_t rax, rcx, rdx, rbx;
+       uint64_t rbp, rsi, rdi;
+       uint64_t r8, r9, r10, r11;
+       uint64_t r12, r13, r14, r15;
+       uint64_t vector;
+       uint64_t error_code;
+       uint64_t rip;
+       uint64_t cs;
+       uint64_t rflags;
+};
+
+struct idt_entry {
+       uint16_t offset0;
+       uint16_t selector;
+       uint16_t ist : 3;
+       uint16_t : 5;
+       uint16_t type : 4;
+       uint16_t : 1;
+       uint16_t dpl : 2;
+       uint16_t p : 1;
+       uint16_t offset1;
+       uint32_t offset2; uint32_t reserved;
+};
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+                       void (*handler)(struct ex_regs *));
+
+/* If a toddler were to say "abracadabra". */
+#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
+
+/*
+ * KVM selftest exception fixup uses registers to coordinate with the exception
+ * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
+ * per-CPU data.  Using only registers avoids having to map memory into the
+ * guest, doesn't require a valid, stable GS.base, and reduces the risk of
+ * for recursive faults when accessing memory in the handler.  The downside to
+ * using registers is that it restricts what registers can be used by the actual
+ * instruction.  But, selftests are 64-bit only, making register* pressure a
+ * minor concern.  Use r9-r11 as they are volatile, i.e. don't need to be saved
+ * by the callee, and except for r11 are not implicit parameters to any
+ * instructions.  Ideally, fixup would use r8-r10 and thus avoid implicit
+ * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
+ * is higher priority than testing non-faulting SYSCALL/SYSRET.
+ *
+ * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
+ * is guaranteed to be non-zero on fault.
+ *
+ * REGISTER INPUTS:
+ * r9  = MAGIC
+ * r10 = RIP
+ * r11 = new RIP on fault
+ *
+ * REGISTER OUTPUTS:
+ * r9  = exception vector (non-zero)
+ * r10 = error code
+ */
+#define __KVM_ASM_SAFE(insn, fep)                              \
+       "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t"   \
+       "lea 1f(%%rip), %%r10\n\t"                              \
+       "lea 2f(%%rip), %%r11\n\t"                              \
+       fep "1: " insn "\n\t"                                   \
+       "xor %%r9, %%r9\n\t"                                    \
+       "2:\n\t"                                                \
+       "mov  %%r9b, %[vector]\n\t"                             \
+       "mov  %%r10, %[error_code]\n\t"
+
+#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
+#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
+
+#define KVM_ASM_SAFE_OUTPUTS(v, ec)    [vector] "=qm"(v), [error_code] "=rm"(ec)
+#define KVM_ASM_SAFE_CLOBBERS  "r9", "r10", "r11"
+
+#define kvm_asm_safe(insn, inputs...)                                  \
+({                                                                     \
+       uint64_t ign_error_code;                                        \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE(insn)                                 \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
+
+#define kvm_asm_safe_ec(insn, error_code, inputs...)                   \
+({                                                                     \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE(insn)                                 \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
+
+#define kvm_asm_safe_fep(insn, inputs...)                              \
+({                                                                     \
+       uint64_t ign_error_code;                                        \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE(insn)                                 \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
+
+#define kvm_asm_safe_ec_fep(insn, error_code, inputs...)               \
+({                                                                     \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE_FEP(insn)                             \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
+
+#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                   \
+static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val)  \
+{                                                                      \
+       uint64_t error_code;                                            \
+       uint8_t vector;                                                 \
+       uint32_t a, d;                                                  \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE##_FEP(#insn)                          \
+                    : "=a"(a), "=d"(d),                                \
+                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : "c"(idx)                                         \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+                                                                       \
+       *val = (uint64_t)a | ((uint64_t)d << 32);                       \
+       return vector;                                                  \
+}
+
+/*
+ * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
+ * use ECX as in input index, and EDX:EAX as a 64-bit output.
+ */
+#define BUILD_READ_U64_SAFE_HELPERS(insn)                              \
+       BUILD_READ_U64_SAFE_HELPER(insn, , )                            \
+       BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                    \
+
+BUILD_READ_U64_SAFE_HELPERS(rdmsr)
+BUILD_READ_U64_SAFE_HELPERS(rdpmc)
+BUILD_READ_U64_SAFE_HELPERS(xgetbv)
+
+static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
+{
+       return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
+}
+
+static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
+{
+       u32 eax = value;
+       u32 edx = value >> 32;
+
+       return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
+}
+
+bool kvm_is_tdp_enabled(void);
+
+static inline bool kvm_is_pmu_enabled(void)
+{
+       return get_kvm_param_bool("enable_pmu");
+}
+
+static inline bool kvm_is_forced_emulation_enabled(void)
+{
+       return !!get_kvm_param_integer("force_emulation_prefix");
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+                                   int *level);
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+                      uint64_t a3);
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+
+static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
+                                                    uint64_t size, uint64_t flags)
+{
+       return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
+}
+
+static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
+                                              uint64_t flags)
+{
+       uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
+
+       GUEST_ASSERT(!ret);
+}
+
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
+
+#define vm_xsave_require_permission(xfeature)  \
+       __vm_xsave_require_permission(xfeature, #xfeature)
+
+enum pg_level {
+       PG_LEVEL_NONE,
+       PG_LEVEL_4K,
+       PG_LEVEL_2M,
+       PG_LEVEL_1G,
+       PG_LEVEL_512G,
+       PG_LEVEL_NUM
+};
+
+#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
+#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
+
+#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
+#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
+#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+                   uint64_t nr_bytes, int level);
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE          (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP          (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM          (1UL<<2) /* Emulation */
+#define X86_CR0_TS          (1UL<<3) /* Task Switched */
+#define X86_CR0_ET          (1UL<<4) /* Extension Type */
+#define X86_CR0_NE          (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP          (1UL<<16) /* Write Protect */
+#define X86_CR0_AM          (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW          (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD          (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG          (1UL<<31) /* Paging */
+
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+#define PFERR_PK_BIT 5
+#define PFERR_SGX_BIT 15
+#define PFERR_GUEST_FINAL_BIT 32
+#define PFERR_GUEST_PAGE_BIT 33
+#define PFERR_IMPLICIT_ACCESS_BIT 48
+
+#define PFERR_PRESENT_MASK     BIT(PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK       BIT(PFERR_WRITE_BIT)
+#define PFERR_USER_MASK                BIT(PFERR_USER_BIT)
+#define PFERR_RSVD_MASK                BIT(PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK       BIT(PFERR_FETCH_BIT)
+#define PFERR_PK_MASK          BIT(PFERR_PK_BIT)
+#define PFERR_SGX_MASK         BIT(PFERR_SGX_BIT)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK  BIT_ULL(PFERR_GUEST_PAGE_BIT)
+#define PFERR_IMPLICIT_ACCESS  BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
+
+bool sys_clocksource_is_based_on_tsc(void);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
new file mode 100644 (file)
index 0000000..82c11c8
--- /dev/null
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers used for SEV guests
+ *
+ */
+#ifndef SELFTEST_KVM_SEV_H
+#define SELFTEST_KVM_SEV_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "linux/psp-sev.h"
+
+#include "kvm_util.h"
+#include "svm_util.h"
+#include "processor.h"
+
+enum sev_guest_state {
+       SEV_GUEST_STATE_UNINITIALIZED = 0,
+       SEV_GUEST_STATE_LAUNCH_UPDATE,
+       SEV_GUEST_STATE_LAUNCH_SECRET,
+       SEV_GUEST_STATE_RUNNING,
+};
+
+#define SEV_POLICY_NO_DBG      (1UL << 0)
+#define SEV_POLICY_ES          (1UL << 2)
+
+#define GHCB_MSR_TERM_REQ      0x100
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
+void sev_vm_launch_finish(struct kvm_vm *vm);
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+                                          struct kvm_vcpu **cpu);
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
+
+kvm_static_assert(SEV_RET_SUCCESS == 0);
+
+/*
+ * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
+ * instead of a proper struct.  The size of the parameter is embedded in the
+ * ioctl number, i.e. is ABI and thus immutable.  Hack around the mess by
+ * creating an overlay to pass in an "unsigned long" without a cast (casting
+ * will make the compiler unhappy due to dereferencing an aliased pointer).
+ */
+#define __vm_sev_ioctl(vm, cmd, arg)                                   \
+({                                                                     \
+       int r;                                                          \
+                                                                       \
+       union {                                                         \
+               struct kvm_sev_cmd c;                                   \
+               unsigned long raw;                                      \
+       } sev_cmd = { .c = {                                            \
+               .id = (cmd),                                            \
+               .data = (uint64_t)(arg),                                \
+               .sev_fd = (vm)->arch.sev_fd,                            \
+       } };                                                            \
+                                                                       \
+       r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw);        \
+       r ?: sev_cmd.c.error;                                           \
+})
+
+#define vm_sev_ioctl(vm, cmd, arg)                                     \
+({                                                                     \
+       int ret = __vm_sev_ioctl(vm, cmd, arg);                         \
+                                                                       \
+       __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm);               \
+})
+
+void sev_vm_init(struct kvm_vm *vm);
+void sev_es_vm_init(struct kvm_vm *vm);
+
+static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
+                                                struct userspace_mem_region *region)
+{
+       struct kvm_enc_region range = {
+               .addr = region->region.userspace_addr,
+               .size = region->region.memory_size,
+       };
+
+       vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
+}
+
+static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+                                         uint64_t size)
+{
+       struct kvm_sev_launch_update_data update_data = {
+               .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
+               .len = size,
+       };
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
+}
+
+#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/include/x86/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h
new file mode 100644 (file)
index 0000000..29cffd0
--- /dev/null
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef SELFTEST_KVM_SVM_H
+#define SELFTEST_KVM_SVM_H
+
+enum {
+       INTERCEPT_INTR,
+       INTERCEPT_NMI,
+       INTERCEPT_SMI,
+       INTERCEPT_INIT,
+       INTERCEPT_VINTR,
+       INTERCEPT_SELECTIVE_CR0,
+       INTERCEPT_STORE_IDTR,
+       INTERCEPT_STORE_GDTR,
+       INTERCEPT_STORE_LDTR,
+       INTERCEPT_STORE_TR,
+       INTERCEPT_LOAD_IDTR,
+       INTERCEPT_LOAD_GDTR,
+       INTERCEPT_LOAD_LDTR,
+       INTERCEPT_LOAD_TR,
+       INTERCEPT_RDTSC,
+       INTERCEPT_RDPMC,
+       INTERCEPT_PUSHF,
+       INTERCEPT_POPF,
+       INTERCEPT_CPUID,
+       INTERCEPT_RSM,
+       INTERCEPT_IRET,
+       INTERCEPT_INTn,
+       INTERCEPT_INVD,
+       INTERCEPT_PAUSE,
+       INTERCEPT_HLT,
+       INTERCEPT_INVLPG,
+       INTERCEPT_INVLPGA,
+       INTERCEPT_IOIO_PROT,
+       INTERCEPT_MSR_PROT,
+       INTERCEPT_TASK_SWITCH,
+       INTERCEPT_FERR_FREEZE,
+       INTERCEPT_SHUTDOWN,
+       INTERCEPT_VMRUN,
+       INTERCEPT_VMMCALL,
+       INTERCEPT_VMLOAD,
+       INTERCEPT_VMSAVE,
+       INTERCEPT_STGI,
+       INTERCEPT_CLGI,
+       INTERCEPT_SKINIT,
+       INTERCEPT_RDTSCP,
+       INTERCEPT_ICEBP,
+       INTERCEPT_WBINVD,
+       INTERCEPT_MONITOR,
+       INTERCEPT_MWAIT,
+       INTERCEPT_MWAIT_COND,
+       INTERCEPT_XSETBV,
+       INTERCEPT_RDPRU,
+};
+
+struct hv_vmcb_enlightenments {
+       struct __packed hv_enlightenments_control {
+               u32 nested_flush_hypercall:1;
+               u32 msr_bitmap:1;
+               u32 enlightened_npt_tlb: 1;
+               u32 reserved:29;
+       } __packed hv_enlightenments_control;
+       u32 hv_vp_id;
+       u64 hv_vm_id;
+       u64 partition_assist_page;
+       u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
+
+/* Synthetic VM-Exit */
+#define HV_SVM_EXITCODE_ENL                    0xf0000000
+#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH   (1)
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+       u32 intercept_cr;
+       u32 intercept_dr;
+       u32 intercept_exceptions;
+       u64 intercept;
+       u8 reserved_1[40];
+       u16 pause_filter_thresh;
+       u16 pause_filter_count;
+       u64 iopm_base_pa;
+       u64 msrpm_base_pa;
+       u64 tsc_offset;
+       u32 asid;
+       u8 tlb_ctl;
+       u8 reserved_2[3];
+       u32 int_ctl;
+       u32 int_vector;
+       u32 int_state;
+       u8 reserved_3[4];
+       u32 exit_code;
+       u32 exit_code_hi;
+       u64 exit_info_1;
+       u64 exit_info_2;
+       u32 exit_int_info;
+       u32 exit_int_info_err;
+       u64 nested_ctl;
+       u64 avic_vapic_bar;
+       u8 reserved_4[8];
+       u32 event_inj;
+       u32 event_inj_err;
+       u64 nested_cr3;
+       u64 virt_ext;
+       u32 clean;
+       u32 reserved_5;
+       u64 next_rip;
+       u8 insn_len;
+       u8 insn_bytes[15];
+       u64 avic_backing_page;  /* Offset 0xe0 */
+       u8 reserved_6[8];       /* Offset 0xe8 */
+       u64 avic_logical_id;    /* Offset 0xf0 */
+       u64 avic_physical_id;   /* Offset 0xf8 */
+       u8 reserved_7[8];
+       u64 vmsa_pa;            /* Used for an SEV-ES guest */
+       u8 reserved_8[720];
+       /*
+        * Offset 0x3e0, 32 bytes reserved
+        * for use by hypervisor/software.
+        */
+       union {
+               struct hv_vmcb_enlightenments hv_enlightenments;
+               u8 reserved_sw[32];
+       };
+};
+
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+#define TLB_CONTROL_FLUSH_ASID 3
+#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define V_GIF_ENABLE_SHIFT 25
+#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+
+#define AVIC_ENABLE_SHIFT 31
+#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
+
+#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
+#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_VM_CR_VALID_MASK   0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK  0x0010ULL
+
+#define SVM_NESTED_CTL_NP_ENABLE       BIT(0)
+#define SVM_NESTED_CTL_SEV_ENABLE      BIT(1)
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+       u16 selector;
+       u16 attrib;
+       u32 limit;
+       u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+       struct vmcb_seg es;
+       struct vmcb_seg cs;
+       struct vmcb_seg ss;
+       struct vmcb_seg ds;
+       struct vmcb_seg fs;
+       struct vmcb_seg gs;
+       struct vmcb_seg gdtr;
+       struct vmcb_seg ldtr;
+       struct vmcb_seg idtr;
+       struct vmcb_seg tr;
+       u8 reserved_1[43];
+       u8 cpl;
+       u8 reserved_2[4];
+       u64 efer;
+       u8 reserved_3[112];
+       u64 cr4;
+       u64 cr3;
+       u64 cr0;
+       u64 dr7;
+       u64 dr6;
+       u64 rflags;
+       u64 rip;
+       u8 reserved_4[88];
+       u64 rsp;
+       u8 reserved_5[24];
+       u64 rax;
+       u64 star;
+       u64 lstar;
+       u64 cstar;
+       u64 sfmask;
+       u64 kernel_gs_base;
+       u64 sysenter_cs;
+       u64 sysenter_esp;
+       u64 sysenter_eip;
+       u64 cr2;
+       u8 reserved_6[32];
+       u64 g_pat;
+       u64 dbgctl;
+       u64 br_from;
+       u64 br_to;
+       u64 last_excp_from;
+       u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+       struct vmcb_control_area control;
+       struct vmcb_save_area save;
+};
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_READ     0
+#define INTERCEPT_CR3_READ     3
+#define INTERCEPT_CR4_READ     4
+#define INTERCEPT_CR8_READ     8
+#define INTERCEPT_CR0_WRITE    (16 + 0)
+#define INTERCEPT_CR3_WRITE    (16 + 3)
+#define INTERCEPT_CR4_WRITE    (16 + 4)
+#define INTERCEPT_CR8_WRITE    (16 + 8)
+
+#define INTERCEPT_DR0_READ     0
+#define INTERCEPT_DR1_READ     1
+#define INTERCEPT_DR2_READ     2
+#define INTERCEPT_DR3_READ     3
+#define INTERCEPT_DR4_READ     4
+#define INTERCEPT_DR5_READ     5
+#define INTERCEPT_DR6_READ     6
+#define INTERCEPT_DR7_READ     7
+#define INTERCEPT_DR0_WRITE    (16 + 0)
+#define INTERCEPT_DR1_WRITE    (16 + 1)
+#define INTERCEPT_DR2_WRITE    (16 + 2)
+#define INTERCEPT_DR3_WRITE    (16 + 3)
+#define INTERCEPT_DR4_WRITE    (16 + 4)
+#define INTERCEPT_DR5_WRITE    (16 + 5)
+#define INTERCEPT_DR6_WRITE    (16 + 6)
+#define INTERCEPT_DR7_WRITE    (16 + 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
+
+#define        SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define        SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define        SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define        SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
+
+#define SVM_EXITINFO_REG_MASK 0x0F
+
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+
+#endif /* SELFTEST_KVM_SVM_H */
diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h
new file mode 100644 (file)
index 0000000..b74c6dc
--- /dev/null
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#ifndef SELFTEST_KVM_SVM_UTILS_H
+#define SELFTEST_KVM_SVM_UTILS_H
+
+#include <asm/svm.h>
+
+#include <stdint.h>
+#include "svm.h"
+#include "processor.h"
+
+struct svm_test_data {
+       /* VMCB */
+       struct vmcb *vmcb; /* gva */
+       void *vmcb_hva;
+       uint64_t vmcb_gpa;
+
+       /* host state-save area */
+       struct vmcb_save_area *save_area; /* gva */
+       void *save_area_hva;
+       uint64_t save_area_gpa;
+
+       /* MSR-Bitmap */
+       void *msr; /* gva */
+       void *msr_hva;
+       uint64_t msr_gpa;
+};
+
+static inline void vmmcall(void)
+{
+       /*
+        * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+        * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+        * use of this function is to exit to L1 from L2.  Clobber all other
+        * GPRs as L1 doesn't correctly preserve them during vmexits.
+        */
+       __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp"
+                            : : "a"(0xdeadbeef), "c"(0xbeefdead)
+                            : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                              "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+#define stgi()                 \
+       __asm__ __volatile__(   \
+               "stgi\n"        \
+               )
+
+#define clgi()                 \
+       __asm__ __volatile__(   \
+               "clgi\n"        \
+               )
+
+struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+
+int open_sev_dev_path_or_exit(void);
+
+#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h
new file mode 100644 (file)
index 0000000..d3825dc
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_IO
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
new file mode 100644 (file)
index 0000000..edb3c39
--- /dev/null
@@ -0,0 +1,575 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_VMX_H
+#define SELFTEST_KVM_VMX_H
+
+#include <asm/vmx.h>
+
+#include <stdint.h>
+#include "processor.h"
+#include "apic.h"
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_INTR_WINDOW_EXITING          0x00000004
+#define CPU_BASED_USE_TSC_OFFSETTING           0x00000008
+#define CPU_BASED_HLT_EXITING                  0x00000080
+#define CPU_BASED_INVLPG_EXITING               0x00000200
+#define CPU_BASED_MWAIT_EXITING                        0x00000400
+#define CPU_BASED_RDPMC_EXITING                        0x00000800
+#define CPU_BASED_RDTSC_EXITING                        0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING             0x00008000
+#define CPU_BASED_CR3_STORE_EXITING            0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING             0x00080000
+#define CPU_BASED_CR8_STORE_EXITING            0x00100000
+#define CPU_BASED_TPR_SHADOW                   0x00200000
+#define CPU_BASED_NMI_WINDOW_EXITING           0x00400000
+#define CPU_BASED_MOV_DR_EXITING               0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING            0x01000000
+#define CPU_BASED_USE_IO_BITMAPS               0x02000000
+#define CPU_BASED_MONITOR_TRAP                 0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS              0x10000000
+#define CPU_BASED_MONITOR_EXITING              0x20000000
+#define CPU_BASED_PAUSE_EXITING                        0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS  0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR    0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT              0x00000002
+#define SECONDARY_EXEC_DESC                    0x00000004
+#define SECONDARY_EXEC_ENABLE_RDTSCP           0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE  0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID             0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING          0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST      0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT      0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY   0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING      0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING          0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID          0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC           0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS             0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING          0x00010000
+#define SECONDARY_EXEC_ENABLE_PML              0x00020000
+#define SECONDARY_EPT_VE                       0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE          0x00100000
+#define SECONDARY_EXEC_TSC_SCALING             0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK                        0x00000001
+#define PIN_BASED_NMI_EXITING                  0x00000008
+#define PIN_BASED_VIRTUAL_NMIS                 0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER         0x00000040
+#define PIN_BASED_POSTED_INTR                  0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR    0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS            0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE           0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL     0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT               0x00008000
+#define VM_EXIT_SAVE_IA32_PAT                  0x00040000
+#define VM_EXIT_LOAD_IA32_PAT                  0x00080000
+#define VM_EXIT_SAVE_IA32_EFER                 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER                 0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER      0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR      0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS           0x00000004
+#define VM_ENTRY_IA32E_MODE                    0x00000200
+#define VM_ENTRY_SMM                           0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR            0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL    0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT                 0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER                        0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR     0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK    0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA                 0x00000020
+
+#define VMX_EPT_VPID_CAP_1G_PAGES              0x00020000
+#define VMX_EPT_VPID_CAP_AD_BITS               0x00200000
+
+#define EXIT_REASON_FAILED_VMENTRY     0x80000000
+
+enum vmcs_field {
+       VIRTUAL_PROCESSOR_ID            = 0x00000000,
+       POSTED_INTR_NV                  = 0x00000002,
+       GUEST_ES_SELECTOR               = 0x00000800,
+       GUEST_CS_SELECTOR               = 0x00000802,
+       GUEST_SS_SELECTOR               = 0x00000804,
+       GUEST_DS_SELECTOR               = 0x00000806,
+       GUEST_FS_SELECTOR               = 0x00000808,
+       GUEST_GS_SELECTOR               = 0x0000080a,
+       GUEST_LDTR_SELECTOR             = 0x0000080c,
+       GUEST_TR_SELECTOR               = 0x0000080e,
+       GUEST_INTR_STATUS               = 0x00000810,
+       GUEST_PML_INDEX                 = 0x00000812,
+       HOST_ES_SELECTOR                = 0x00000c00,
+       HOST_CS_SELECTOR                = 0x00000c02,
+       HOST_SS_SELECTOR                = 0x00000c04,
+       HOST_DS_SELECTOR                = 0x00000c06,
+       HOST_FS_SELECTOR                = 0x00000c08,
+       HOST_GS_SELECTOR                = 0x00000c0a,
+       HOST_TR_SELECTOR                = 0x00000c0c,
+       IO_BITMAP_A                     = 0x00002000,
+       IO_BITMAP_A_HIGH                = 0x00002001,
+       IO_BITMAP_B                     = 0x00002002,
+       IO_BITMAP_B_HIGH                = 0x00002003,
+       MSR_BITMAP                      = 0x00002004,
+       MSR_BITMAP_HIGH                 = 0x00002005,
+       VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
+       VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
+       VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
+       VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
+       VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
+       VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
+       PML_ADDRESS                     = 0x0000200e,
+       PML_ADDRESS_HIGH                = 0x0000200f,
+       TSC_OFFSET                      = 0x00002010,
+       TSC_OFFSET_HIGH                 = 0x00002011,
+       VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
+       VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+       APIC_ACCESS_ADDR                = 0x00002014,
+       APIC_ACCESS_ADDR_HIGH           = 0x00002015,
+       POSTED_INTR_DESC_ADDR           = 0x00002016,
+       POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
+       EPT_POINTER                     = 0x0000201a,
+       EPT_POINTER_HIGH                = 0x0000201b,
+       EOI_EXIT_BITMAP0                = 0x0000201c,
+       EOI_EXIT_BITMAP0_HIGH           = 0x0000201d,
+       EOI_EXIT_BITMAP1                = 0x0000201e,
+       EOI_EXIT_BITMAP1_HIGH           = 0x0000201f,
+       EOI_EXIT_BITMAP2                = 0x00002020,
+       EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
+       EOI_EXIT_BITMAP3                = 0x00002022,
+       EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
+       VMREAD_BITMAP                   = 0x00002026,
+       VMREAD_BITMAP_HIGH              = 0x00002027,
+       VMWRITE_BITMAP                  = 0x00002028,
+       VMWRITE_BITMAP_HIGH             = 0x00002029,
+       XSS_EXIT_BITMAP                 = 0x0000202C,
+       XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
+       ENCLS_EXITING_BITMAP            = 0x0000202E,
+       ENCLS_EXITING_BITMAP_HIGH       = 0x0000202F,
+       TSC_MULTIPLIER                  = 0x00002032,
+       TSC_MULTIPLIER_HIGH             = 0x00002033,
+       GUEST_PHYSICAL_ADDRESS          = 0x00002400,
+       GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
+       VMCS_LINK_POINTER               = 0x00002800,
+       VMCS_LINK_POINTER_HIGH          = 0x00002801,
+       GUEST_IA32_DEBUGCTL             = 0x00002802,
+       GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+       GUEST_IA32_PAT                  = 0x00002804,
+       GUEST_IA32_PAT_HIGH             = 0x00002805,
+       GUEST_IA32_EFER                 = 0x00002806,
+       GUEST_IA32_EFER_HIGH            = 0x00002807,
+       GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
+       GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+       GUEST_PDPTR0                    = 0x0000280a,
+       GUEST_PDPTR0_HIGH               = 0x0000280b,
+       GUEST_PDPTR1                    = 0x0000280c,
+       GUEST_PDPTR1_HIGH               = 0x0000280d,
+       GUEST_PDPTR2                    = 0x0000280e,
+       GUEST_PDPTR2_HIGH               = 0x0000280f,
+       GUEST_PDPTR3                    = 0x00002810,
+       GUEST_PDPTR3_HIGH               = 0x00002811,
+       GUEST_BNDCFGS                   = 0x00002812,
+       GUEST_BNDCFGS_HIGH              = 0x00002813,
+       HOST_IA32_PAT                   = 0x00002c00,
+       HOST_IA32_PAT_HIGH              = 0x00002c01,
+       HOST_IA32_EFER                  = 0x00002c02,
+       HOST_IA32_EFER_HIGH             = 0x00002c03,
+       HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
+       HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+       PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
+       CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
+       EXCEPTION_BITMAP                = 0x00004004,
+       PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
+       PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
+       CR3_TARGET_COUNT                = 0x0000400a,
+       VM_EXIT_CONTROLS                = 0x0000400c,
+       VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
+       VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
+       VM_ENTRY_CONTROLS               = 0x00004012,
+       VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
+       VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
+       VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
+       VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
+       TPR_THRESHOLD                   = 0x0000401c,
+       SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+       PLE_GAP                         = 0x00004020,
+       PLE_WINDOW                      = 0x00004022,
+       VM_INSTRUCTION_ERROR            = 0x00004400,
+       VM_EXIT_REASON                  = 0x00004402,
+       VM_EXIT_INTR_INFO               = 0x00004404,
+       VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
+       IDT_VECTORING_INFO_FIELD        = 0x00004408,
+       IDT_VECTORING_ERROR_CODE        = 0x0000440a,
+       VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
+       VMX_INSTRUCTION_INFO            = 0x0000440e,
+       GUEST_ES_LIMIT                  = 0x00004800,
+       GUEST_CS_LIMIT                  = 0x00004802,
+       GUEST_SS_LIMIT                  = 0x00004804,
+       GUEST_DS_LIMIT                  = 0x00004806,
+       GUEST_FS_LIMIT                  = 0x00004808,
+       GUEST_GS_LIMIT                  = 0x0000480a,
+       GUEST_LDTR_LIMIT                = 0x0000480c,
+       GUEST_TR_LIMIT                  = 0x0000480e,
+       GUEST_GDTR_LIMIT                = 0x00004810,
+       GUEST_IDTR_LIMIT                = 0x00004812,
+       GUEST_ES_AR_BYTES               = 0x00004814,
+       GUEST_CS_AR_BYTES               = 0x00004816,
+       GUEST_SS_AR_BYTES               = 0x00004818,
+       GUEST_DS_AR_BYTES               = 0x0000481a,
+       GUEST_FS_AR_BYTES               = 0x0000481c,
+       GUEST_GS_AR_BYTES               = 0x0000481e,
+       GUEST_LDTR_AR_BYTES             = 0x00004820,
+       GUEST_TR_AR_BYTES               = 0x00004822,
+       GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
+       GUEST_ACTIVITY_STATE            = 0X00004826,
+       GUEST_SYSENTER_CS               = 0x0000482A,
+       VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
+       HOST_IA32_SYSENTER_CS           = 0x00004c00,
+       CR0_GUEST_HOST_MASK             = 0x00006000,
+       CR4_GUEST_HOST_MASK             = 0x00006002,
+       CR0_READ_SHADOW                 = 0x00006004,
+       CR4_READ_SHADOW                 = 0x00006006,
+       CR3_TARGET_VALUE0               = 0x00006008,
+       CR3_TARGET_VALUE1               = 0x0000600a,
+       CR3_TARGET_VALUE2               = 0x0000600c,
+       CR3_TARGET_VALUE3               = 0x0000600e,
+       EXIT_QUALIFICATION              = 0x00006400,
+       GUEST_LINEAR_ADDRESS            = 0x0000640a,
+       GUEST_CR0                       = 0x00006800,
+       GUEST_CR3                       = 0x00006802,
+       GUEST_CR4                       = 0x00006804,
+       GUEST_ES_BASE                   = 0x00006806,
+       GUEST_CS_BASE                   = 0x00006808,
+       GUEST_SS_BASE                   = 0x0000680a,
+       GUEST_DS_BASE                   = 0x0000680c,
+       GUEST_FS_BASE                   = 0x0000680e,
+       GUEST_GS_BASE                   = 0x00006810,
+       GUEST_LDTR_BASE                 = 0x00006812,
+       GUEST_TR_BASE                   = 0x00006814,
+       GUEST_GDTR_BASE                 = 0x00006816,
+       GUEST_IDTR_BASE                 = 0x00006818,
+       GUEST_DR7                       = 0x0000681a,
+       GUEST_RSP                       = 0x0000681c,
+       GUEST_RIP                       = 0x0000681e,
+       GUEST_RFLAGS                    = 0x00006820,
+       GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
+       GUEST_SYSENTER_ESP              = 0x00006824,
+       GUEST_SYSENTER_EIP              = 0x00006826,
+       HOST_CR0                        = 0x00006c00,
+       HOST_CR3                        = 0x00006c02,
+       HOST_CR4                        = 0x00006c04,
+       HOST_FS_BASE                    = 0x00006c06,
+       HOST_GS_BASE                    = 0x00006c08,
+       HOST_TR_BASE                    = 0x00006c0a,
+       HOST_GDTR_BASE                  = 0x00006c0c,
+       HOST_IDTR_BASE                  = 0x00006c0e,
+       HOST_IA32_SYSENTER_ESP          = 0x00006c10,
+       HOST_IA32_SYSENTER_EIP          = 0x00006c12,
+       HOST_RSP                        = 0x00006c14,
+       HOST_RIP                        = 0x00006c16,
+};
+
+struct vmx_msr_entry {
+       uint32_t index;
+       uint32_t reserved;
+       uint64_t value;
+} __attribute__ ((aligned(16)));
+
+#include "evmcs.h"
+
+static inline int vmxon(uint64_t phys)
+{
+       uint8_t ret;
+
+       __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+               : [ret]"=rm"(ret)
+               : [pa]"m"(phys)
+               : "cc", "memory");
+
+       return ret;
+}
+
+static inline void vmxoff(void)
+{
+       __asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+       uint8_t ret;
+
+       __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+               : [ret]"=rm"(ret)
+               : [pa]"m"(vmcs_pa)
+               : "cc", "memory");
+
+       return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+       uint8_t ret;
+
+       if (enable_evmcs)
+               return -1;
+
+       __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+               : [ret]"=rm"(ret)
+               : [pa]"m"(vmcs_pa)
+               : "cc", "memory");
+
+       return ret;
+}
+
+static inline int vmptrst(uint64_t *value)
+{
+       uint64_t tmp;
+       uint8_t ret;
+
+       if (enable_evmcs)
+               return evmcs_vmptrst(value);
+
+       __asm__ __volatile__("vmptrst %[value]; setna %[ret]"
+               : [value]"=m"(tmp), [ret]"=rm"(ret)
+               : : "cc", "memory");
+
+       *value = tmp;
+       return ret;
+}
+
+/*
+ * A wrapper around vmptrst that ignores errors and returns zero if the
+ * vmptrst instruction fails.
+ */
+static inline uint64_t vmptrstz(void)
+{
+       uint64_t value = 0;
+       vmptrst(&value);
+       return value;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+       int ret;
+
+       if (enable_evmcs)
+               return evmcs_vmlaunch();
+
+       __asm__ __volatile__("push %%rbp;"
+                            "push %%rcx;"
+                            "push %%rdx;"
+                            "push %%rsi;"
+                            "push %%rdi;"
+                            "push $0;"
+                            "vmwrite %%rsp, %[host_rsp];"
+                            "lea 1f(%%rip), %%rax;"
+                            "vmwrite %%rax, %[host_rip];"
+                            "vmlaunch;"
+                            "incq (%%rsp);"
+                            "1: pop %%rax;"
+                            "pop %%rdi;"
+                            "pop %%rsi;"
+                            "pop %%rdx;"
+                            "pop %%rcx;"
+                            "pop %%rbp;"
+                            : [ret]"=&a"(ret)
+                            : [host_rsp]"r"((uint64_t)HOST_RSP),
+                              [host_rip]"r"((uint64_t)HOST_RIP)
+                            : "memory", "cc", "rbx", "r8", "r9", "r10",
+                              "r11", "r12", "r13", "r14", "r15");
+       return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+       int ret;
+
+       if (enable_evmcs)
+               return evmcs_vmresume();
+
+       __asm__ __volatile__("push %%rbp;"
+                            "push %%rcx;"
+                            "push %%rdx;"
+                            "push %%rsi;"
+                            "push %%rdi;"
+                            "push $0;"
+                            "vmwrite %%rsp, %[host_rsp];"
+                            "lea 1f(%%rip), %%rax;"
+                            "vmwrite %%rax, %[host_rip];"
+                            "vmresume;"
+                            "incq (%%rsp);"
+                            "1: pop %%rax;"
+                            "pop %%rdi;"
+                            "pop %%rsi;"
+                            "pop %%rdx;"
+                            "pop %%rcx;"
+                            "pop %%rbp;"
+                            : [ret]"=&a"(ret)
+                            : [host_rsp]"r"((uint64_t)HOST_RSP),
+                              [host_rip]"r"((uint64_t)HOST_RIP)
+                            : "memory", "cc", "rbx", "r8", "r9", "r10",
+                              "r11", "r12", "r13", "r14", "r15");
+       return ret;
+}
+
+static inline void vmcall(void)
+{
+       /*
+        * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+        * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+        * use of this function is to exit to L1 from L2.  Clobber all other
+        * GPRs as L1 doesn't correctly preserve them during vmexits.
+        */
+       __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp"
+                            : : "a"(0xdeadbeef), "c"(0xbeefdead)
+                            : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                              "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+       uint64_t tmp;
+       uint8_t ret;
+
+       if (enable_evmcs)
+               return evmcs_vmread(encoding, value);
+
+       __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+               : [value]"=rm"(tmp), [ret]"=rm"(ret)
+               : [encoding]"r"(encoding)
+               : "cc", "memory");
+
+       *value = tmp;
+       return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+       uint64_t value = 0;
+       vmread(encoding, &value);
+       return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+       uint8_t ret;
+
+       if (enable_evmcs)
+               return evmcs_vmwrite(encoding, value);
+
+       __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+               : [ret]"=rm"(ret)
+               : [value]"rm"(value), [encoding]"r"(encoding)
+               : "cc", "memory");
+
+       return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+       return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+struct vmx_pages {
+       void *vmxon_hva;
+       uint64_t vmxon_gpa;
+       void *vmxon;
+
+       void *vmcs_hva;
+       uint64_t vmcs_gpa;
+       void *vmcs;
+
+       void *msr_hva;
+       uint64_t msr_gpa;
+       void *msr;
+
+       void *shadow_vmcs_hva;
+       uint64_t shadow_vmcs_gpa;
+       void *shadow_vmcs;
+
+       void *vmread_hva;
+       uint64_t vmread_gpa;
+       void *vmread;
+
+       void *vmwrite_hva;
+       uint64_t vmwrite_gpa;
+       void *vmwrite;
+
+       void *eptp_hva;
+       uint64_t eptp_gpa;
+       void *eptp;
+
+       void *apic_access_hva;
+       uint64_t apic_access_gpa;
+       void *apic_access;
+};
+
+union vmx_basic {
+       u64 val;
+       struct {
+               u32 revision;
+               u32     size:13,
+                       reserved1:3,
+                       width:1,
+                       dual:1,
+                       type:4,
+                       insouts:1,
+                       ctrl:1,
+                       vm_entry_exception_ctrl:1,
+                       reserved2:7;
+       };
+};
+
+union vmx_ctrl_msr {
+       u64 val;
+       struct {
+               u32 set, clr;
+       };
+};
+
+struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
+bool prepare_for_vmx_operation(struct vmx_pages *vmx);
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+bool load_vmcs(struct vmx_pages *vmx);
+
+bool ept_1g_pages_supported(void);
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                  uint64_t nested_paddr, uint64_t paddr);
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                uint64_t nested_paddr, uint64_t paddr, uint64_t size);
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+                       uint32_t memslot);
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+                           uint64_t addr, uint64_t size);
+bool kvm_cpu_has_ept(void);
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+                 uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
+
+#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
deleted file mode 100644 (file)
index 5199009..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/apic.h
- *
- * Copyright (C) 2021, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_APIC_H
-#define SELFTEST_KVM_APIC_H
-
-#include <stdint.h>
-
-#include "processor.h"
-#include "ucall_common.h"
-
-#define APIC_DEFAULT_GPA               0xfee00000ULL
-
-/* APIC base address MSR and fields */
-#define MSR_IA32_APICBASE              0x0000001b
-#define MSR_IA32_APICBASE_BSP          (1<<8)
-#define MSR_IA32_APICBASE_EXTD         (1<<10)
-#define MSR_IA32_APICBASE_ENABLE       (1<<11)
-#define MSR_IA32_APICBASE_BASE         (0xfffff<<12)
-#define                GET_APIC_BASE(x)        (((x) >> 12) << 12)
-
-#define APIC_BASE_MSR  0x800
-#define X2APIC_ENABLE  (1UL << 10)
-#define        APIC_ID         0x20
-#define        APIC_LVR        0x30
-#define                GET_APIC_ID_FIELD(x)    (((x) >> 24) & 0xFF)
-#define        APIC_TASKPRI    0x80
-#define        APIC_PROCPRI    0xA0
-#define        APIC_EOI        0xB0
-#define        APIC_SPIV       0xF0
-#define                APIC_SPIV_FOCUS_DISABLED        (1 << 9)
-#define                APIC_SPIV_APIC_ENABLED          (1 << 8)
-#define APIC_IRR       0x200
-#define        APIC_ICR        0x300
-#define        APIC_LVTCMCI    0x2f0
-#define                APIC_DEST_SELF          0x40000
-#define                APIC_DEST_ALLINC        0x80000
-#define                APIC_DEST_ALLBUT        0xC0000
-#define                APIC_ICR_RR_MASK        0x30000
-#define                APIC_ICR_RR_INVALID     0x00000
-#define                APIC_ICR_RR_INPROG      0x10000
-#define                APIC_ICR_RR_VALID       0x20000
-#define                APIC_INT_LEVELTRIG      0x08000
-#define                APIC_INT_ASSERT         0x04000
-#define                APIC_ICR_BUSY           0x01000
-#define                APIC_DEST_LOGICAL       0x00800
-#define                APIC_DEST_PHYSICAL      0x00000
-#define                APIC_DM_FIXED           0x00000
-#define                APIC_DM_FIXED_MASK      0x00700
-#define                APIC_DM_LOWEST          0x00100
-#define                APIC_DM_SMI             0x00200
-#define                APIC_DM_REMRD           0x00300
-#define                APIC_DM_NMI             0x00400
-#define                APIC_DM_INIT            0x00500
-#define                APIC_DM_STARTUP         0x00600
-#define                APIC_DM_EXTINT          0x00700
-#define                APIC_VECTOR_MASK        0x000FF
-#define        APIC_ICR2       0x310
-#define                SET_APIC_DEST_FIELD(x)  ((x) << 24)
-#define APIC_LVTT      0x320
-#define                APIC_LVT_TIMER_ONESHOT          (0 << 17)
-#define                APIC_LVT_TIMER_PERIODIC         (1 << 17)
-#define                APIC_LVT_TIMER_TSCDEADLINE      (2 << 17)
-#define                APIC_LVT_MASKED                 (1 << 16)
-#define        APIC_TMICT      0x380
-#define        APIC_TMCCT      0x390
-#define        APIC_TDCR       0x3E0
-
-void apic_disable(void);
-void xapic_enable(void);
-void x2apic_enable(void);
-
-static inline uint32_t get_bsp_flag(void)
-{
-       return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
-}
-
-static inline uint32_t xapic_read_reg(unsigned int reg)
-{
-       return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
-}
-
-static inline void xapic_write_reg(unsigned int reg, uint32_t val)
-{
-       ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
-}
-
-static inline uint64_t x2apic_read_reg(unsigned int reg)
-{
-       return rdmsr(APIC_BASE_MSR + (reg >> 4));
-}
-
-static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
-{
-       return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
-}
-
-static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
-{
-       uint8_t fault = x2apic_write_reg_safe(reg, value);
-
-       __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
-                      fault, APIC_BASE_MSR + (reg >> 4), value);
-}
-
-static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
-{
-       uint8_t fault = x2apic_write_reg_safe(reg, value);
-
-       __GUEST_ASSERT(fault == GP_VECTOR,
-                      "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
-                      APIC_BASE_MSR + (reg >> 4), value, fault);
-}
-
-
-#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
deleted file mode 100644 (file)
index 901caf0..0000000
+++ /dev/null
@@ -1,1279 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/evmcs.h
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- */
-
-#ifndef SELFTEST_KVM_EVMCS_H
-#define SELFTEST_KVM_EVMCS_H
-
-#include <stdint.h>
-#include "hyperv.h"
-#include "vmx.h"
-
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
-#define EVMCS_VERSION 1
-
-extern bool enable_evmcs;
-
-struct hv_enlightened_vmcs {
-       u32 revision_id;
-       u32 abort;
-
-       u16 host_es_selector;
-       u16 host_cs_selector;
-       u16 host_ss_selector;
-       u16 host_ds_selector;
-       u16 host_fs_selector;
-       u16 host_gs_selector;
-       u16 host_tr_selector;
-
-       u16 padding16_1;
-
-       u64 host_ia32_pat;
-       u64 host_ia32_efer;
-
-       u64 host_cr0;
-       u64 host_cr3;
-       u64 host_cr4;
-
-       u64 host_ia32_sysenter_esp;
-       u64 host_ia32_sysenter_eip;
-       u64 host_rip;
-       u32 host_ia32_sysenter_cs;
-
-       u32 pin_based_vm_exec_control;
-       u32 vm_exit_controls;
-       u32 secondary_vm_exec_control;
-
-       u64 io_bitmap_a;
-       u64 io_bitmap_b;
-       u64 msr_bitmap;
-
-       u16 guest_es_selector;
-       u16 guest_cs_selector;
-       u16 guest_ss_selector;
-       u16 guest_ds_selector;
-       u16 guest_fs_selector;
-       u16 guest_gs_selector;
-       u16 guest_ldtr_selector;
-       u16 guest_tr_selector;
-
-       u32 guest_es_limit;
-       u32 guest_cs_limit;
-       u32 guest_ss_limit;
-       u32 guest_ds_limit;
-       u32 guest_fs_limit;
-       u32 guest_gs_limit;
-       u32 guest_ldtr_limit;
-       u32 guest_tr_limit;
-       u32 guest_gdtr_limit;
-       u32 guest_idtr_limit;
-
-       u32 guest_es_ar_bytes;
-       u32 guest_cs_ar_bytes;
-       u32 guest_ss_ar_bytes;
-       u32 guest_ds_ar_bytes;
-       u32 guest_fs_ar_bytes;
-       u32 guest_gs_ar_bytes;
-       u32 guest_ldtr_ar_bytes;
-       u32 guest_tr_ar_bytes;
-
-       u64 guest_es_base;
-       u64 guest_cs_base;
-       u64 guest_ss_base;
-       u64 guest_ds_base;
-       u64 guest_fs_base;
-       u64 guest_gs_base;
-       u64 guest_ldtr_base;
-       u64 guest_tr_base;
-       u64 guest_gdtr_base;
-       u64 guest_idtr_base;
-
-       u64 padding64_1[3];
-
-       u64 vm_exit_msr_store_addr;
-       u64 vm_exit_msr_load_addr;
-       u64 vm_entry_msr_load_addr;
-
-       u64 cr3_target_value0;
-       u64 cr3_target_value1;
-       u64 cr3_target_value2;
-       u64 cr3_target_value3;
-
-       u32 page_fault_error_code_mask;
-       u32 page_fault_error_code_match;
-
-       u32 cr3_target_count;
-       u32 vm_exit_msr_store_count;
-       u32 vm_exit_msr_load_count;
-       u32 vm_entry_msr_load_count;
-
-       u64 tsc_offset;
-       u64 virtual_apic_page_addr;
-       u64 vmcs_link_pointer;
-
-       u64 guest_ia32_debugctl;
-       u64 guest_ia32_pat;
-       u64 guest_ia32_efer;
-
-       u64 guest_pdptr0;
-       u64 guest_pdptr1;
-       u64 guest_pdptr2;
-       u64 guest_pdptr3;
-
-       u64 guest_pending_dbg_exceptions;
-       u64 guest_sysenter_esp;
-       u64 guest_sysenter_eip;
-
-       u32 guest_activity_state;
-       u32 guest_sysenter_cs;
-
-       u64 cr0_guest_host_mask;
-       u64 cr4_guest_host_mask;
-       u64 cr0_read_shadow;
-       u64 cr4_read_shadow;
-       u64 guest_cr0;
-       u64 guest_cr3;
-       u64 guest_cr4;
-       u64 guest_dr7;
-
-       u64 host_fs_base;
-       u64 host_gs_base;
-       u64 host_tr_base;
-       u64 host_gdtr_base;
-       u64 host_idtr_base;
-       u64 host_rsp;
-
-       u64 ept_pointer;
-
-       u16 virtual_processor_id;
-       u16 padding16_2[3];
-
-       u64 padding64_2[5];
-       u64 guest_physical_address;
-
-       u32 vm_instruction_error;
-       u32 vm_exit_reason;
-       u32 vm_exit_intr_info;
-       u32 vm_exit_intr_error_code;
-       u32 idt_vectoring_info_field;
-       u32 idt_vectoring_error_code;
-       u32 vm_exit_instruction_len;
-       u32 vmx_instruction_info;
-
-       u64 exit_qualification;
-       u64 exit_io_instruction_ecx;
-       u64 exit_io_instruction_esi;
-       u64 exit_io_instruction_edi;
-       u64 exit_io_instruction_eip;
-
-       u64 guest_linear_address;
-       u64 guest_rsp;
-       u64 guest_rflags;
-
-       u32 guest_interruptibility_info;
-       u32 cpu_based_vm_exec_control;
-       u32 exception_bitmap;
-       u32 vm_entry_controls;
-       u32 vm_entry_intr_info_field;
-       u32 vm_entry_exception_error_code;
-       u32 vm_entry_instruction_len;
-       u32 tpr_threshold;
-
-       u64 guest_rip;
-
-       u32 hv_clean_fields;
-       u32 padding32_1;
-       u32 hv_synthetic_controls;
-       struct {
-               u32 nested_flush_hypercall:1;
-               u32 msr_bitmap:1;
-               u32 reserved:30;
-       }  __packed hv_enlightenments_control;
-       u32 hv_vp_id;
-       u32 padding32_2;
-       u64 hv_vm_id;
-       u64 partition_assist_page;
-       u64 padding64_4[4];
-       u64 guest_bndcfgs;
-       u64 guest_ia32_perf_global_ctrl;
-       u64 guest_ia32_s_cet;
-       u64 guest_ssp;
-       u64 guest_ia32_int_ssp_table_addr;
-       u64 guest_ia32_lbr_ctl;
-       u64 padding64_5[2];
-       u64 xss_exit_bitmap;
-       u64 encls_exiting_bitmap;
-       u64 host_ia32_perf_global_ctrl;
-       u64 tsc_multiplier;
-       u64 host_ia32_s_cet;
-       u64 host_ssp;
-       u64 host_ia32_int_ssp_table_addr;
-       u64 padding64_6;
-} __packed;
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE                     0
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP                BIT(0)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP               BIT(1)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2             BIT(2)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1             BIT(3)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC             BIT(4)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT            BIT(5)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY            BIT(6)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN            BIT(7)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR                     BIT(8)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT             BIT(9)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC              BIT(10)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1               BIT(11)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2               BIT(12)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER             BIT(13)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1                BIT(14)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL    BIT(15)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL                      0xFFFF
-
-#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
-
-extern struct hv_enlightened_vmcs *current_evmcs;
-
-int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
-
-static inline void evmcs_enable(void)
-{
-       enable_evmcs = true;
-}
-
-static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
-{
-       current_vp_assist->current_nested_vmcs = vmcs_pa;
-       current_vp_assist->enlighten_vmentry = 1;
-
-       current_evmcs = vmcs;
-
-       return 0;
-}
-
-static inline bool load_evmcs(struct hyperv_test_pages *hv)
-{
-       if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs))
-               return false;
-
-       current_evmcs->revision_id = EVMCS_VERSION;
-
-       return true;
-}
-
-static inline int evmcs_vmptrst(uint64_t *value)
-{
-       *value = current_vp_assist->current_nested_vmcs &
-               ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
-       return 0;
-}
-
-static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
-{
-       switch (encoding) {
-       case GUEST_RIP:
-               *value = current_evmcs->guest_rip;
-               break;
-       case GUEST_RSP:
-               *value = current_evmcs->guest_rsp;
-               break;
-       case GUEST_RFLAGS:
-               *value = current_evmcs->guest_rflags;
-               break;
-       case HOST_IA32_PAT:
-               *value = current_evmcs->host_ia32_pat;
-               break;
-       case HOST_IA32_EFER:
-               *value = current_evmcs->host_ia32_efer;
-               break;
-       case HOST_CR0:
-               *value = current_evmcs->host_cr0;
-               break;
-       case HOST_CR3:
-               *value = current_evmcs->host_cr3;
-               break;
-       case HOST_CR4:
-               *value = current_evmcs->host_cr4;
-               break;
-       case HOST_IA32_SYSENTER_ESP:
-               *value = current_evmcs->host_ia32_sysenter_esp;
-               break;
-       case HOST_IA32_SYSENTER_EIP:
-               *value = current_evmcs->host_ia32_sysenter_eip;
-               break;
-       case HOST_RIP:
-               *value = current_evmcs->host_rip;
-               break;
-       case IO_BITMAP_A:
-               *value = current_evmcs->io_bitmap_a;
-               break;
-       case IO_BITMAP_B:
-               *value = current_evmcs->io_bitmap_b;
-               break;
-       case MSR_BITMAP:
-               *value = current_evmcs->msr_bitmap;
-               break;
-       case GUEST_ES_BASE:
-               *value = current_evmcs->guest_es_base;
-               break;
-       case GUEST_CS_BASE:
-               *value = current_evmcs->guest_cs_base;
-               break;
-       case GUEST_SS_BASE:
-               *value = current_evmcs->guest_ss_base;
-               break;
-       case GUEST_DS_BASE:
-               *value = current_evmcs->guest_ds_base;
-               break;
-       case GUEST_FS_BASE:
-               *value = current_evmcs->guest_fs_base;
-               break;
-       case GUEST_GS_BASE:
-               *value = current_evmcs->guest_gs_base;
-               break;
-       case GUEST_LDTR_BASE:
-               *value = current_evmcs->guest_ldtr_base;
-               break;
-       case GUEST_TR_BASE:
-               *value = current_evmcs->guest_tr_base;
-               break;
-       case GUEST_GDTR_BASE:
-               *value = current_evmcs->guest_gdtr_base;
-               break;
-       case GUEST_IDTR_BASE:
-               *value = current_evmcs->guest_idtr_base;
-               break;
-       case TSC_OFFSET:
-               *value = current_evmcs->tsc_offset;
-               break;
-       case VIRTUAL_APIC_PAGE_ADDR:
-               *value = current_evmcs->virtual_apic_page_addr;
-               break;
-       case VMCS_LINK_POINTER:
-               *value = current_evmcs->vmcs_link_pointer;
-               break;
-       case GUEST_IA32_DEBUGCTL:
-               *value = current_evmcs->guest_ia32_debugctl;
-               break;
-       case GUEST_IA32_PAT:
-               *value = current_evmcs->guest_ia32_pat;
-               break;
-       case GUEST_IA32_EFER:
-               *value = current_evmcs->guest_ia32_efer;
-               break;
-       case GUEST_PDPTR0:
-               *value = current_evmcs->guest_pdptr0;
-               break;
-       case GUEST_PDPTR1:
-               *value = current_evmcs->guest_pdptr1;
-               break;
-       case GUEST_PDPTR2:
-               *value = current_evmcs->guest_pdptr2;
-               break;
-       case GUEST_PDPTR3:
-               *value = current_evmcs->guest_pdptr3;
-               break;
-       case GUEST_PENDING_DBG_EXCEPTIONS:
-               *value = current_evmcs->guest_pending_dbg_exceptions;
-               break;
-       case GUEST_SYSENTER_ESP:
-               *value = current_evmcs->guest_sysenter_esp;
-               break;
-       case GUEST_SYSENTER_EIP:
-               *value = current_evmcs->guest_sysenter_eip;
-               break;
-       case CR0_GUEST_HOST_MASK:
-               *value = current_evmcs->cr0_guest_host_mask;
-               break;
-       case CR4_GUEST_HOST_MASK:
-               *value = current_evmcs->cr4_guest_host_mask;
-               break;
-       case CR0_READ_SHADOW:
-               *value = current_evmcs->cr0_read_shadow;
-               break;
-       case CR4_READ_SHADOW:
-               *value = current_evmcs->cr4_read_shadow;
-               break;
-       case GUEST_CR0:
-               *value = current_evmcs->guest_cr0;
-               break;
-       case GUEST_CR3:
-               *value = current_evmcs->guest_cr3;
-               break;
-       case GUEST_CR4:
-               *value = current_evmcs->guest_cr4;
-               break;
-       case GUEST_DR7:
-               *value = current_evmcs->guest_dr7;
-               break;
-       case HOST_FS_BASE:
-               *value = current_evmcs->host_fs_base;
-               break;
-       case HOST_GS_BASE:
-               *value = current_evmcs->host_gs_base;
-               break;
-       case HOST_TR_BASE:
-               *value = current_evmcs->host_tr_base;
-               break;
-       case HOST_GDTR_BASE:
-               *value = current_evmcs->host_gdtr_base;
-               break;
-       case HOST_IDTR_BASE:
-               *value = current_evmcs->host_idtr_base;
-               break;
-       case HOST_RSP:
-               *value = current_evmcs->host_rsp;
-               break;
-       case EPT_POINTER:
-               *value = current_evmcs->ept_pointer;
-               break;
-       case GUEST_BNDCFGS:
-               *value = current_evmcs->guest_bndcfgs;
-               break;
-       case XSS_EXIT_BITMAP:
-               *value = current_evmcs->xss_exit_bitmap;
-               break;
-       case GUEST_PHYSICAL_ADDRESS:
-               *value = current_evmcs->guest_physical_address;
-               break;
-       case EXIT_QUALIFICATION:
-               *value = current_evmcs->exit_qualification;
-               break;
-       case GUEST_LINEAR_ADDRESS:
-               *value = current_evmcs->guest_linear_address;
-               break;
-       case VM_EXIT_MSR_STORE_ADDR:
-               *value = current_evmcs->vm_exit_msr_store_addr;
-               break;
-       case VM_EXIT_MSR_LOAD_ADDR:
-               *value = current_evmcs->vm_exit_msr_load_addr;
-               break;
-       case VM_ENTRY_MSR_LOAD_ADDR:
-               *value = current_evmcs->vm_entry_msr_load_addr;
-               break;
-       case CR3_TARGET_VALUE0:
-               *value = current_evmcs->cr3_target_value0;
-               break;
-       case CR3_TARGET_VALUE1:
-               *value = current_evmcs->cr3_target_value1;
-               break;
-       case CR3_TARGET_VALUE2:
-               *value = current_evmcs->cr3_target_value2;
-               break;
-       case CR3_TARGET_VALUE3:
-               *value = current_evmcs->cr3_target_value3;
-               break;
-       case TPR_THRESHOLD:
-               *value = current_evmcs->tpr_threshold;
-               break;
-       case GUEST_INTERRUPTIBILITY_INFO:
-               *value = current_evmcs->guest_interruptibility_info;
-               break;
-       case CPU_BASED_VM_EXEC_CONTROL:
-               *value = current_evmcs->cpu_based_vm_exec_control;
-               break;
-       case EXCEPTION_BITMAP:
-               *value = current_evmcs->exception_bitmap;
-               break;
-       case VM_ENTRY_CONTROLS:
-               *value = current_evmcs->vm_entry_controls;
-               break;
-       case VM_ENTRY_INTR_INFO_FIELD:
-               *value = current_evmcs->vm_entry_intr_info_field;
-               break;
-       case VM_ENTRY_EXCEPTION_ERROR_CODE:
-               *value = current_evmcs->vm_entry_exception_error_code;
-               break;
-       case VM_ENTRY_INSTRUCTION_LEN:
-               *value = current_evmcs->vm_entry_instruction_len;
-               break;
-       case HOST_IA32_SYSENTER_CS:
-               *value = current_evmcs->host_ia32_sysenter_cs;
-               break;
-       case PIN_BASED_VM_EXEC_CONTROL:
-               *value = current_evmcs->pin_based_vm_exec_control;
-               break;
-       case VM_EXIT_CONTROLS:
-               *value = current_evmcs->vm_exit_controls;
-               break;
-       case SECONDARY_VM_EXEC_CONTROL:
-               *value = current_evmcs->secondary_vm_exec_control;
-               break;
-       case GUEST_ES_LIMIT:
-               *value = current_evmcs->guest_es_limit;
-               break;
-       case GUEST_CS_LIMIT:
-               *value = current_evmcs->guest_cs_limit;
-               break;
-       case GUEST_SS_LIMIT:
-               *value = current_evmcs->guest_ss_limit;
-               break;
-       case GUEST_DS_LIMIT:
-               *value = current_evmcs->guest_ds_limit;
-               break;
-       case GUEST_FS_LIMIT:
-               *value = current_evmcs->guest_fs_limit;
-               break;
-       case GUEST_GS_LIMIT:
-               *value = current_evmcs->guest_gs_limit;
-               break;
-       case GUEST_LDTR_LIMIT:
-               *value = current_evmcs->guest_ldtr_limit;
-               break;
-       case GUEST_TR_LIMIT:
-               *value = current_evmcs->guest_tr_limit;
-               break;
-       case GUEST_GDTR_LIMIT:
-               *value = current_evmcs->guest_gdtr_limit;
-               break;
-       case GUEST_IDTR_LIMIT:
-               *value = current_evmcs->guest_idtr_limit;
-               break;
-       case GUEST_ES_AR_BYTES:
-               *value = current_evmcs->guest_es_ar_bytes;
-               break;
-       case GUEST_CS_AR_BYTES:
-               *value = current_evmcs->guest_cs_ar_bytes;
-               break;
-       case GUEST_SS_AR_BYTES:
-               *value = current_evmcs->guest_ss_ar_bytes;
-               break;
-       case GUEST_DS_AR_BYTES:
-               *value = current_evmcs->guest_ds_ar_bytes;
-               break;
-       case GUEST_FS_AR_BYTES:
-               *value = current_evmcs->guest_fs_ar_bytes;
-               break;
-       case GUEST_GS_AR_BYTES:
-               *value = current_evmcs->guest_gs_ar_bytes;
-               break;
-       case GUEST_LDTR_AR_BYTES:
-               *value = current_evmcs->guest_ldtr_ar_bytes;
-               break;
-       case GUEST_TR_AR_BYTES:
-               *value = current_evmcs->guest_tr_ar_bytes;
-               break;
-       case GUEST_ACTIVITY_STATE:
-               *value = current_evmcs->guest_activity_state;
-               break;
-       case GUEST_SYSENTER_CS:
-               *value = current_evmcs->guest_sysenter_cs;
-               break;
-       case VM_INSTRUCTION_ERROR:
-               *value = current_evmcs->vm_instruction_error;
-               break;
-       case VM_EXIT_REASON:
-               *value = current_evmcs->vm_exit_reason;
-               break;
-       case VM_EXIT_INTR_INFO:
-               *value = current_evmcs->vm_exit_intr_info;
-               break;
-       case VM_EXIT_INTR_ERROR_CODE:
-               *value = current_evmcs->vm_exit_intr_error_code;
-               break;
-       case IDT_VECTORING_INFO_FIELD:
-               *value = current_evmcs->idt_vectoring_info_field;
-               break;
-       case IDT_VECTORING_ERROR_CODE:
-               *value = current_evmcs->idt_vectoring_error_code;
-               break;
-       case VM_EXIT_INSTRUCTION_LEN:
-               *value = current_evmcs->vm_exit_instruction_len;
-               break;
-       case VMX_INSTRUCTION_INFO:
-               *value = current_evmcs->vmx_instruction_info;
-               break;
-       case PAGE_FAULT_ERROR_CODE_MASK:
-               *value = current_evmcs->page_fault_error_code_mask;
-               break;
-       case PAGE_FAULT_ERROR_CODE_MATCH:
-               *value = current_evmcs->page_fault_error_code_match;
-               break;
-       case CR3_TARGET_COUNT:
-               *value = current_evmcs->cr3_target_count;
-               break;
-       case VM_EXIT_MSR_STORE_COUNT:
-               *value = current_evmcs->vm_exit_msr_store_count;
-               break;
-       case VM_EXIT_MSR_LOAD_COUNT:
-               *value = current_evmcs->vm_exit_msr_load_count;
-               break;
-       case VM_ENTRY_MSR_LOAD_COUNT:
-               *value = current_evmcs->vm_entry_msr_load_count;
-               break;
-       case HOST_ES_SELECTOR:
-               *value = current_evmcs->host_es_selector;
-               break;
-       case HOST_CS_SELECTOR:
-               *value = current_evmcs->host_cs_selector;
-               break;
-       case HOST_SS_SELECTOR:
-               *value = current_evmcs->host_ss_selector;
-               break;
-       case HOST_DS_SELECTOR:
-               *value = current_evmcs->host_ds_selector;
-               break;
-       case HOST_FS_SELECTOR:
-               *value = current_evmcs->host_fs_selector;
-               break;
-       case HOST_GS_SELECTOR:
-               *value = current_evmcs->host_gs_selector;
-               break;
-       case HOST_TR_SELECTOR:
-               *value = current_evmcs->host_tr_selector;
-               break;
-       case GUEST_ES_SELECTOR:
-               *value = current_evmcs->guest_es_selector;
-               break;
-       case GUEST_CS_SELECTOR:
-               *value = current_evmcs->guest_cs_selector;
-               break;
-       case GUEST_SS_SELECTOR:
-               *value = current_evmcs->guest_ss_selector;
-               break;
-       case GUEST_DS_SELECTOR:
-               *value = current_evmcs->guest_ds_selector;
-               break;
-       case GUEST_FS_SELECTOR:
-               *value = current_evmcs->guest_fs_selector;
-               break;
-       case GUEST_GS_SELECTOR:
-               *value = current_evmcs->guest_gs_selector;
-               break;
-       case GUEST_LDTR_SELECTOR:
-               *value = current_evmcs->guest_ldtr_selector;
-               break;
-       case GUEST_TR_SELECTOR:
-               *value = current_evmcs->guest_tr_selector;
-               break;
-       case VIRTUAL_PROCESSOR_ID:
-               *value = current_evmcs->virtual_processor_id;
-               break;
-       case HOST_IA32_PERF_GLOBAL_CTRL:
-               *value = current_evmcs->host_ia32_perf_global_ctrl;
-               break;
-       case GUEST_IA32_PERF_GLOBAL_CTRL:
-               *value = current_evmcs->guest_ia32_perf_global_ctrl;
-               break;
-       case ENCLS_EXITING_BITMAP:
-               *value = current_evmcs->encls_exiting_bitmap;
-               break;
-       case TSC_MULTIPLIER:
-               *value = current_evmcs->tsc_multiplier;
-               break;
-       default: return 1;
-       }
-
-       return 0;
-}
-
-static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
-{
-       switch (encoding) {
-       case GUEST_RIP:
-               current_evmcs->guest_rip = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case GUEST_RSP:
-               current_evmcs->guest_rsp = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
-               break;
-       case GUEST_RFLAGS:
-               current_evmcs->guest_rflags = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
-               break;
-       case HOST_IA32_PAT:
-               current_evmcs->host_ia32_pat = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_IA32_EFER:
-               current_evmcs->host_ia32_efer = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_CR0:
-               current_evmcs->host_cr0 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_CR3:
-               current_evmcs->host_cr3 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_CR4:
-               current_evmcs->host_cr4 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_IA32_SYSENTER_ESP:
-               current_evmcs->host_ia32_sysenter_esp = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_IA32_SYSENTER_EIP:
-               current_evmcs->host_ia32_sysenter_eip = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_RIP:
-               current_evmcs->host_rip = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case IO_BITMAP_A:
-               current_evmcs->io_bitmap_a = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
-               break;
-       case IO_BITMAP_B:
-               current_evmcs->io_bitmap_b = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
-               break;
-       case MSR_BITMAP:
-               current_evmcs->msr_bitmap = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
-               break;
-       case GUEST_ES_BASE:
-               current_evmcs->guest_es_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_CS_BASE:
-               current_evmcs->guest_cs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_SS_BASE:
-               current_evmcs->guest_ss_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_DS_BASE:
-               current_evmcs->guest_ds_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_FS_BASE:
-               current_evmcs->guest_fs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GS_BASE:
-               current_evmcs->guest_gs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_LDTR_BASE:
-               current_evmcs->guest_ldtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_TR_BASE:
-               current_evmcs->guest_tr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GDTR_BASE:
-               current_evmcs->guest_gdtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_IDTR_BASE:
-               current_evmcs->guest_idtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case TSC_OFFSET:
-               current_evmcs->tsc_offset = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       case VIRTUAL_APIC_PAGE_ADDR:
-               current_evmcs->virtual_apic_page_addr = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       case VMCS_LINK_POINTER:
-               current_evmcs->vmcs_link_pointer = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_IA32_DEBUGCTL:
-               current_evmcs->guest_ia32_debugctl = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_IA32_PAT:
-               current_evmcs->guest_ia32_pat = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_IA32_EFER:
-               current_evmcs->guest_ia32_efer = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PDPTR0:
-               current_evmcs->guest_pdptr0 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PDPTR1:
-               current_evmcs->guest_pdptr1 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PDPTR2:
-               current_evmcs->guest_pdptr2 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PDPTR3:
-               current_evmcs->guest_pdptr3 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_PENDING_DBG_EXCEPTIONS:
-               current_evmcs->guest_pending_dbg_exceptions = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_SYSENTER_ESP:
-               current_evmcs->guest_sysenter_esp = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_SYSENTER_EIP:
-               current_evmcs->guest_sysenter_eip = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case CR0_GUEST_HOST_MASK:
-               current_evmcs->cr0_guest_host_mask = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case CR4_GUEST_HOST_MASK:
-               current_evmcs->cr4_guest_host_mask = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case CR0_READ_SHADOW:
-               current_evmcs->cr0_read_shadow = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case CR4_READ_SHADOW:
-               current_evmcs->cr4_read_shadow = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case GUEST_CR0:
-               current_evmcs->guest_cr0 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case GUEST_CR3:
-               current_evmcs->guest_cr3 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case GUEST_CR4:
-               current_evmcs->guest_cr4 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case GUEST_DR7:
-               current_evmcs->guest_dr7 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
-               break;
-       case HOST_FS_BASE:
-               current_evmcs->host_fs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_GS_BASE:
-               current_evmcs->host_gs_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_TR_BASE:
-               current_evmcs->host_tr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_GDTR_BASE:
-               current_evmcs->host_gdtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_IDTR_BASE:
-               current_evmcs->host_idtr_base = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case HOST_RSP:
-               current_evmcs->host_rsp = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-               break;
-       case EPT_POINTER:
-               current_evmcs->ept_pointer = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
-               break;
-       case GUEST_BNDCFGS:
-               current_evmcs->guest_bndcfgs = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case XSS_EXIT_BITMAP:
-               current_evmcs->xss_exit_bitmap = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       case GUEST_PHYSICAL_ADDRESS:
-               current_evmcs->guest_physical_address = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case EXIT_QUALIFICATION:
-               current_evmcs->exit_qualification = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case GUEST_LINEAR_ADDRESS:
-               current_evmcs->guest_linear_address = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_MSR_STORE_ADDR:
-               current_evmcs->vm_exit_msr_store_addr = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_EXIT_MSR_LOAD_ADDR:
-               current_evmcs->vm_exit_msr_load_addr = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_ENTRY_MSR_LOAD_ADDR:
-               current_evmcs->vm_entry_msr_load_addr = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_VALUE0:
-               current_evmcs->cr3_target_value0 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_VALUE1:
-               current_evmcs->cr3_target_value1 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_VALUE2:
-               current_evmcs->cr3_target_value2 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_VALUE3:
-               current_evmcs->cr3_target_value3 = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case TPR_THRESHOLD:
-               current_evmcs->tpr_threshold = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case GUEST_INTERRUPTIBILITY_INFO:
-               current_evmcs->guest_interruptibility_info = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
-               break;
-       case CPU_BASED_VM_EXEC_CONTROL:
-               current_evmcs->cpu_based_vm_exec_control = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC;
-               break;
-       case EXCEPTION_BITMAP:
-               current_evmcs->exception_bitmap = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN;
-               break;
-       case VM_ENTRY_CONTROLS:
-               current_evmcs->vm_entry_controls = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY;
-               break;
-       case VM_ENTRY_INTR_INFO_FIELD:
-               current_evmcs->vm_entry_intr_info_field = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
-               break;
-       case VM_ENTRY_EXCEPTION_ERROR_CODE:
-               current_evmcs->vm_entry_exception_error_code = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
-               break;
-       case VM_ENTRY_INSTRUCTION_LEN:
-               current_evmcs->vm_entry_instruction_len = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
-               break;
-       case HOST_IA32_SYSENTER_CS:
-               current_evmcs->host_ia32_sysenter_cs = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case PIN_BASED_VM_EXEC_CONTROL:
-               current_evmcs->pin_based_vm_exec_control = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
-               break;
-       case VM_EXIT_CONTROLS:
-               current_evmcs->vm_exit_controls = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
-               break;
-       case SECONDARY_VM_EXEC_CONTROL:
-               current_evmcs->secondary_vm_exec_control = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
-               break;
-       case GUEST_ES_LIMIT:
-               current_evmcs->guest_es_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_CS_LIMIT:
-               current_evmcs->guest_cs_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_SS_LIMIT:
-               current_evmcs->guest_ss_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_DS_LIMIT:
-               current_evmcs->guest_ds_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_FS_LIMIT:
-               current_evmcs->guest_fs_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GS_LIMIT:
-               current_evmcs->guest_gs_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_LDTR_LIMIT:
-               current_evmcs->guest_ldtr_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_TR_LIMIT:
-               current_evmcs->guest_tr_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GDTR_LIMIT:
-               current_evmcs->guest_gdtr_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_IDTR_LIMIT:
-               current_evmcs->guest_idtr_limit = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_ES_AR_BYTES:
-               current_evmcs->guest_es_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_CS_AR_BYTES:
-               current_evmcs->guest_cs_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_SS_AR_BYTES:
-               current_evmcs->guest_ss_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_DS_AR_BYTES:
-               current_evmcs->guest_ds_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_FS_AR_BYTES:
-               current_evmcs->guest_fs_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GS_AR_BYTES:
-               current_evmcs->guest_gs_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_LDTR_AR_BYTES:
-               current_evmcs->guest_ldtr_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_TR_AR_BYTES:
-               current_evmcs->guest_tr_ar_bytes = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_ACTIVITY_STATE:
-               current_evmcs->guest_activity_state = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case GUEST_SYSENTER_CS:
-               current_evmcs->guest_sysenter_cs = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case VM_INSTRUCTION_ERROR:
-               current_evmcs->vm_instruction_error = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_REASON:
-               current_evmcs->vm_exit_reason = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_INTR_INFO:
-               current_evmcs->vm_exit_intr_info = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_INTR_ERROR_CODE:
-               current_evmcs->vm_exit_intr_error_code = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case IDT_VECTORING_INFO_FIELD:
-               current_evmcs->idt_vectoring_info_field = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case IDT_VECTORING_ERROR_CODE:
-               current_evmcs->idt_vectoring_error_code = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VM_EXIT_INSTRUCTION_LEN:
-               current_evmcs->vm_exit_instruction_len = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case VMX_INSTRUCTION_INFO:
-               current_evmcs->vmx_instruction_info = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
-               break;
-       case PAGE_FAULT_ERROR_CODE_MASK:
-               current_evmcs->page_fault_error_code_mask = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case PAGE_FAULT_ERROR_CODE_MATCH:
-               current_evmcs->page_fault_error_code_match = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case CR3_TARGET_COUNT:
-               current_evmcs->cr3_target_count = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_EXIT_MSR_STORE_COUNT:
-               current_evmcs->vm_exit_msr_store_count = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_EXIT_MSR_LOAD_COUNT:
-               current_evmcs->vm_exit_msr_load_count = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case VM_ENTRY_MSR_LOAD_COUNT:
-               current_evmcs->vm_entry_msr_load_count = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-               break;
-       case HOST_ES_SELECTOR:
-               current_evmcs->host_es_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_CS_SELECTOR:
-               current_evmcs->host_cs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_SS_SELECTOR:
-               current_evmcs->host_ss_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_DS_SELECTOR:
-               current_evmcs->host_ds_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_FS_SELECTOR:
-               current_evmcs->host_fs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_GS_SELECTOR:
-               current_evmcs->host_gs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case HOST_TR_SELECTOR:
-               current_evmcs->host_tr_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case GUEST_ES_SELECTOR:
-               current_evmcs->guest_es_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_CS_SELECTOR:
-               current_evmcs->guest_cs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_SS_SELECTOR:
-               current_evmcs->guest_ss_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_DS_SELECTOR:
-               current_evmcs->guest_ds_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_FS_SELECTOR:
-               current_evmcs->guest_fs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_GS_SELECTOR:
-               current_evmcs->guest_gs_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_LDTR_SELECTOR:
-               current_evmcs->guest_ldtr_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case GUEST_TR_SELECTOR:
-               current_evmcs->guest_tr_selector = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
-               break;
-       case VIRTUAL_PROCESSOR_ID:
-               current_evmcs->virtual_processor_id = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
-               break;
-       case HOST_IA32_PERF_GLOBAL_CTRL:
-               current_evmcs->host_ia32_perf_global_ctrl = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-               break;
-       case GUEST_IA32_PERF_GLOBAL_CTRL:
-               current_evmcs->guest_ia32_perf_global_ctrl = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
-               break;
-       case ENCLS_EXITING_BITMAP:
-               current_evmcs->encls_exiting_bitmap = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       case TSC_MULTIPLIER:
-               current_evmcs->tsc_multiplier = value;
-               current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
-               break;
-       default: return 1;
-       }
-
-       return 0;
-}
-
-static inline int evmcs_vmlaunch(void)
-{
-       int ret;
-
-       current_evmcs->hv_clean_fields = 0;
-
-       __asm__ __volatile__("push %%rbp;"
-                            "push %%rcx;"
-                            "push %%rdx;"
-                            "push %%rsi;"
-                            "push %%rdi;"
-                            "push $0;"
-                            "mov %%rsp, (%[host_rsp]);"
-                            "lea 1f(%%rip), %%rax;"
-                            "mov %%rax, (%[host_rip]);"
-                            "vmlaunch;"
-                            "incq (%%rsp);"
-                            "1: pop %%rax;"
-                            "pop %%rdi;"
-                            "pop %%rsi;"
-                            "pop %%rdx;"
-                            "pop %%rcx;"
-                            "pop %%rbp;"
-                            : [ret]"=&a"(ret)
-                            : [host_rsp]"r"
-                              ((uint64_t)&current_evmcs->host_rsp),
-                              [host_rip]"r"
-                              ((uint64_t)&current_evmcs->host_rip)
-                            : "memory", "cc", "rbx", "r8", "r9", "r10",
-                              "r11", "r12", "r13", "r14", "r15");
-       return ret;
-}
-
-/*
- * No guest state (e.g. GPRs) is established by this vmresume.
- */
-static inline int evmcs_vmresume(void)
-{
-       int ret;
-
-       /* HOST_RIP */
-       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
-       /* HOST_RSP */
-       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
-
-       __asm__ __volatile__("push %%rbp;"
-                            "push %%rcx;"
-                            "push %%rdx;"
-                            "push %%rsi;"
-                            "push %%rdi;"
-                            "push $0;"
-                            "mov %%rsp, (%[host_rsp]);"
-                            "lea 1f(%%rip), %%rax;"
-                            "mov %%rax, (%[host_rip]);"
-                            "vmresume;"
-                            "incq (%%rsp);"
-                            "1: pop %%rax;"
-                            "pop %%rdi;"
-                            "pop %%rsi;"
-                            "pop %%rdx;"
-                            "pop %%rcx;"
-                            "pop %%rbp;"
-                            : [ret]"=&a"(ret)
-                            : [host_rsp]"r"
-                              ((uint64_t)&current_evmcs->host_rsp),
-                              [host_rip]"r"
-                              ((uint64_t)&current_evmcs->host_rip)
-                            : "memory", "cc", "rbx", "r8", "r9", "r10",
-                              "r11", "r12", "r13", "r14", "r15");
-       return ret;
-}
-
-#endif /* !SELFTEST_KVM_EVMCS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
deleted file mode 100644 (file)
index 6849e25..0000000
+++ /dev/null
@@ -1,364 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/hyperv.h
- *
- * Copyright (C) 2021, Red Hat, Inc.
- *
- */
-
-#ifndef SELFTEST_KVM_HYPERV_H
-#define SELFTEST_KVM_HYPERV_H
-
-#include "processor.h"
-
-#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS  0x40000000
-#define HYPERV_CPUID_INTERFACE                 0x40000001
-#define HYPERV_CPUID_VERSION                   0x40000002
-#define HYPERV_CPUID_FEATURES                  0x40000003
-#define HYPERV_CPUID_ENLIGHTMENT_INFO          0x40000004
-#define HYPERV_CPUID_IMPLEMENT_LIMITS          0x40000005
-#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES   0x40000007
-#define HYPERV_CPUID_NESTED_FEATURES           0x4000000A
-#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS   0x40000080
-#define HYPERV_CPUID_SYNDBG_INTERFACE                  0x40000081
-#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES      0x40000082
-
-#define HV_X64_MSR_GUEST_OS_ID                 0x40000000
-#define HV_X64_MSR_HYPERCALL                   0x40000001
-#define HV_X64_MSR_VP_INDEX                    0x40000002
-#define HV_X64_MSR_RESET                       0x40000003
-#define HV_X64_MSR_VP_RUNTIME                  0x40000010
-#define HV_X64_MSR_TIME_REF_COUNT              0x40000020
-#define HV_X64_MSR_REFERENCE_TSC               0x40000021
-#define HV_X64_MSR_TSC_FREQUENCY               0x40000022
-#define HV_X64_MSR_APIC_FREQUENCY              0x40000023
-#define HV_X64_MSR_EOI                         0x40000070
-#define HV_X64_MSR_ICR                         0x40000071
-#define HV_X64_MSR_TPR                         0x40000072
-#define HV_X64_MSR_VP_ASSIST_PAGE              0x40000073
-#define HV_X64_MSR_SCONTROL                    0x40000080
-#define HV_X64_MSR_SVERSION                    0x40000081
-#define HV_X64_MSR_SIEFP                       0x40000082
-#define HV_X64_MSR_SIMP                                0x40000083
-#define HV_X64_MSR_EOM                         0x40000084
-#define HV_X64_MSR_SINT0                       0x40000090
-#define HV_X64_MSR_SINT1                       0x40000091
-#define HV_X64_MSR_SINT2                       0x40000092
-#define HV_X64_MSR_SINT3                       0x40000093
-#define HV_X64_MSR_SINT4                       0x40000094
-#define HV_X64_MSR_SINT5                       0x40000095
-#define HV_X64_MSR_SINT6                       0x40000096
-#define HV_X64_MSR_SINT7                       0x40000097
-#define HV_X64_MSR_SINT8                       0x40000098
-#define HV_X64_MSR_SINT9                       0x40000099
-#define HV_X64_MSR_SINT10                      0x4000009A
-#define HV_X64_MSR_SINT11                      0x4000009B
-#define HV_X64_MSR_SINT12                      0x4000009C
-#define HV_X64_MSR_SINT13                      0x4000009D
-#define HV_X64_MSR_SINT14                      0x4000009E
-#define HV_X64_MSR_SINT15                      0x4000009F
-#define HV_X64_MSR_STIMER0_CONFIG              0x400000B0
-#define HV_X64_MSR_STIMER0_COUNT               0x400000B1
-#define HV_X64_MSR_STIMER1_CONFIG              0x400000B2
-#define HV_X64_MSR_STIMER1_COUNT               0x400000B3
-#define HV_X64_MSR_STIMER2_CONFIG              0x400000B4
-#define HV_X64_MSR_STIMER2_COUNT               0x400000B5
-#define HV_X64_MSR_STIMER3_CONFIG              0x400000B6
-#define HV_X64_MSR_STIMER3_COUNT               0x400000B7
-#define HV_X64_MSR_GUEST_IDLE                  0x400000F0
-#define HV_X64_MSR_CRASH_P0                    0x40000100
-#define HV_X64_MSR_CRASH_P1                    0x40000101
-#define HV_X64_MSR_CRASH_P2                    0x40000102
-#define HV_X64_MSR_CRASH_P3                    0x40000103
-#define HV_X64_MSR_CRASH_P4                    0x40000104
-#define HV_X64_MSR_CRASH_CTL                   0x40000105
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL     0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL       0x40000107
-#define HV_X64_MSR_TSC_EMULATION_STATUS                0x40000108
-#define HV_X64_MSR_TSC_INVARIANT_CONTROL       0x40000118
-
-#define HV_X64_MSR_SYNDBG_CONTROL              0x400000F1
-#define HV_X64_MSR_SYNDBG_STATUS               0x400000F2
-#define HV_X64_MSR_SYNDBG_SEND_BUFFER          0x400000F3
-#define HV_X64_MSR_SYNDBG_RECV_BUFFER          0x400000F4
-#define HV_X64_MSR_SYNDBG_PENDING_BUFFER       0x400000F5
-#define HV_X64_MSR_SYNDBG_OPTIONS              0x400000FF
-
-/* HYPERV_CPUID_FEATURES.EAX */
-#define HV_MSR_VP_RUNTIME_AVAILABLE            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0)
-#define HV_MSR_TIME_REF_COUNT_AVAILABLE                \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1)
-#define HV_MSR_SYNIC_AVAILABLE                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2)
-#define HV_MSR_SYNTIMER_AVAILABLE              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3)
-#define HV_MSR_APIC_ACCESS_AVAILABLE           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4)
-#define HV_MSR_HYPERCALL_AVAILABLE             \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5)
-#define HV_MSR_VP_INDEX_AVAILABLE              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6)
-#define HV_MSR_RESET_AVAILABLE                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7)
-#define HV_MSR_STAT_PAGES_AVAILABLE            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8)
-#define HV_MSR_REFERENCE_TSC_AVAILABLE         \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9)
-#define HV_MSR_GUEST_IDLE_AVAILABLE            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10)
-#define HV_ACCESS_FREQUENCY_MSRS               \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11)
-#define HV_ACCESS_REENLIGHTENMENT              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13)
-#define HV_ACCESS_TSC_INVARIANT                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15)
-
-/* HYPERV_CPUID_FEATURES.EBX */
-#define HV_CREATE_PARTITIONS                   \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0)
-#define HV_ACCESS_PARTITION_ID                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1)
-#define HV_ACCESS_MEMORY_POOL                  \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2)
-#define HV_ADJUST_MESSAGE_BUFFERS              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3)
-#define HV_POST_MESSAGES                       \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4)
-#define HV_SIGNAL_EVENTS                       \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5)
-#define HV_CREATE_PORT                         \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6)
-#define HV_CONNECT_PORT                                \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7)
-#define HV_ACCESS_STATS                                \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8)
-#define HV_DEBUGGING                           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
-#define HV_CPU_MANAGEMENT                      \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
-#define HV_ENABLE_EXTENDED_HYPERCALLS          \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
-#define HV_ISOLATION                           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
-
-/* HYPERV_CPUID_FEATURES.EDX */
-#define HV_X64_MWAIT_AVAILABLE                         \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0)
-#define HV_X64_GUEST_DEBUGGING_AVAILABLE               \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1)
-#define HV_X64_PERF_MONITOR_AVAILABLE                  \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2)
-#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE      \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3)
-#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4)
-#define HV_X64_GUEST_IDLE_STATE_AVAILABLE              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5)
-#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8)
-#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE           \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10)
-#define HV_FEATURE_DEBUG_MSRS_AVAILABLE                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11)
-#define HV_STIMER_DIRECT_MODE_AVAILABLE                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19)
-
-/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
-#define HV_X64_AS_SWITCH_RECOMMENDED                   \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0)
-#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED             \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1)
-#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2)
-#define HV_X64_APIC_ACCESS_RECOMMENDED                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3)
-#define HV_X64_SYSTEM_RESET_RECOMMENDED                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4)
-#define HV_X64_RELAXED_TIMING_RECOMMENDED              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5)
-#define HV_DEPRECATING_AEOI_RECOMMENDED                        \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9)
-#define HV_X64_CLUSTER_IPI_RECOMMENDED                 \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10)
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED          \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11)
-#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
-
-/* HYPERV_CPUID_NESTED_FEATURES.EAX */
-#define HV_X64_NESTED_DIRECT_FLUSH                     \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17)
-#define HV_X64_NESTED_GUEST_MAPPING_FLUSH              \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18)
-#define HV_X64_NESTED_MSR_BITMAP                       \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19)
-
-/* HYPERV_CPUID_NESTED_FEATURES.EBX */
-#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL          \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0)
-
-/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
-#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING       \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
-
-/* Hypercalls */
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE     0x0002
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST      0x0003
-#define HVCALL_NOTIFY_LONG_SPIN_WAIT           0x0008
-#define HVCALL_SEND_IPI                                0x000b
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
-#define HVCALL_SEND_IPI_EX                     0x0015
-#define HVCALL_GET_PARTITION_ID                        0x0046
-#define HVCALL_DEPOSIT_MEMORY                  0x0048
-#define HVCALL_CREATE_VP                       0x004e
-#define HVCALL_GET_VP_REGISTERS                        0x0050
-#define HVCALL_SET_VP_REGISTERS                        0x0051
-#define HVCALL_POST_MESSAGE                    0x005c
-#define HVCALL_SIGNAL_EVENT                    0x005d
-#define HVCALL_POST_DEBUG_DATA                 0x0069
-#define HVCALL_RETRIEVE_DEBUG_DATA             0x006a
-#define HVCALL_RESET_DEBUG_SESSION             0x006b
-#define HVCALL_ADD_LOGICAL_PROCESSOR           0x0076
-#define HVCALL_MAP_DEVICE_INTERRUPT            0x007c
-#define HVCALL_UNMAP_DEVICE_INTERRUPT          0x007d
-#define HVCALL_RETARGET_INTERRUPT              0x007e
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
-
-/* Extended hypercalls */
-#define HV_EXT_CALL_QUERY_CAPABILITIES         0x8001
-
-#define HV_FLUSH_ALL_PROCESSORS                        BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES    BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY      BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT     BIT(3)
-
-/* hypercall status code */
-#define HV_STATUS_SUCCESS                      0
-#define HV_STATUS_INVALID_HYPERCALL_CODE       2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT      3
-#define HV_STATUS_INVALID_ALIGNMENT            4
-#define HV_STATUS_INVALID_PARAMETER            5
-#define HV_STATUS_ACCESS_DENIED                        6
-#define HV_STATUS_OPERATION_DENIED             8
-#define HV_STATUS_INSUFFICIENT_MEMORY          11
-#define HV_STATUS_INVALID_PORT_ID              17
-#define HV_STATUS_INVALID_CONNECTION_ID                18
-#define HV_STATUS_INSUFFICIENT_BUFFERS         19
-
-/* hypercall options */
-#define HV_HYPERCALL_FAST_BIT          BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET    17
-#define HV_HYPERCALL_REP_COMP_OFFSET   32
-
-/*
- * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
- * is set to the hypercall status (if no exception occurred).
- */
-static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
-                                        vm_vaddr_t output_address,
-                                        uint64_t *hv_status)
-{
-       uint64_t error_code;
-       uint8_t vector;
-
-       /* Note both the hypercall and the "asm safe" clobber r9-r11. */
-       asm volatile("mov %[output_address], %%r8\n\t"
-                    KVM_ASM_SAFE("vmcall")
-                    : "=a" (*hv_status),
-                      "+c" (control), "+d" (input_address),
-                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)
-                    : [output_address] "r"(output_address),
-                      "a" (-EFAULT)
-                    : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
-       return vector;
-}
-
-/* Issue a Hyper-V hypercall and assert that it succeeded. */
-static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
-                                   vm_vaddr_t output_address)
-{
-       uint64_t hv_status;
-       uint8_t vector;
-
-       vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
-
-       GUEST_ASSERT(!vector);
-       GUEST_ASSERT((hv_status & 0xffff) == 0);
-}
-
-/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */
-static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
-{
-       int i;
-
-       for (i = 0; i < n_sse_regs; i++)
-               write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
-}
-
-/* Proper HV_X64_MSR_GUEST_OS_ID value */
-#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
-
-#define HV_X64_MSR_VP_ASSIST_PAGE              0x40000073
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE       0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT        12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
-               (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
-
-struct hv_nested_enlightenments_control {
-       struct {
-               __u32 directhypercall:1;
-               __u32 reserved:31;
-       } features;
-       struct {
-               __u32 reserved;
-       } hypercallControls;
-} __packed;
-
-/* Define virtual processor assist page structure. */
-struct hv_vp_assist_page {
-       __u32 apic_assist;
-       __u32 reserved1;
-       __u64 vtl_control[3];
-       struct hv_nested_enlightenments_control nested_control;
-       __u8 enlighten_vmentry;
-       __u8 reserved2[7];
-       __u64 current_nested_vmcs;
-} __packed;
-
-extern struct hv_vp_assist_page *current_vp_assist;
-
-int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
-
-struct hyperv_test_pages {
-       /* VP assist page */
-       void *vp_assist_hva;
-       uint64_t vp_assist_gpa;
-       void *vp_assist;
-
-       /* Partition assist page */
-       void *partition_assist_hva;
-       uint64_t partition_assist_gpa;
-       void *partition_assist;
-
-       /* Enlightened VMCS */
-       void *enlightened_vmcs_hva;
-       uint64_t enlightened_vmcs_gpa;
-       void *enlightened_vmcs;
-};
-
-struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
-                                                      vm_vaddr_t *p_hv_pages_gva);
-
-/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
-#define HV_INVARIANT_TSC_EXPOSED               BIT_ULL(0)
-
-const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
-const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
-void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
-
-bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature);
-
-#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
deleted file mode 100644 (file)
index 972bb1c..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UTIL_ARCH_H
-#define SELFTEST_KVM_UTIL_ARCH_H
-
-#include <stdbool.h>
-#include <stdint.h>
-
-#include "kvm_util_types.h"
-#include "test_util.h"
-
-extern bool is_forced_emulation_enabled;
-
-struct kvm_vm_arch {
-       vm_vaddr_t gdt;
-       vm_vaddr_t tss;
-       vm_vaddr_t idt;
-
-       uint64_t c_bit;
-       uint64_t s_bit;
-       int sev_fd;
-       bool is_pt_protected;
-};
-
-static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
-{
-       return arch->c_bit || arch->s_bit;
-}
-
-#define vm_arch_has_protected_memory(vm) \
-       __vm_arch_has_protected_memory(&(vm)->arch)
-
-#define vcpu_arch_put_guest(mem, __val)                                                        \
-do {                                                                                   \
-       const typeof(mem) val = (__val);                                                \
-                                                                                       \
-       if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) {            \
-               (mem) = val;                                                            \
-       } else if (guest_random_bool(&guest_rng)) {                                     \
-               __asm__ __volatile__(KVM_FEP "mov %1, %0"                               \
-                                    : "+m" (mem)                                       \
-                                    : "r" (val) : "memory");                           \
-       } else {                                                                        \
-               uint64_t __old = READ_ONCE(mem);                                        \
-                                                                                       \
-               __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]"       \
-                                    : [ptr] "+m" (mem), [old] "+a" (__old)             \
-                                    : [new]"r" (val) : "memory", "cc");                \
-       }                                                                               \
-} while (0)
-
-#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86_64/mce.h
deleted file mode 100644 (file)
index 6119321..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/mce.h
- *
- * Copyright (C) 2022, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_MCE_H
-#define SELFTEST_KVM_MCE_H
-
-#define MCG_CTL_P              BIT_ULL(8)   /* MCG_CTL register available */
-#define MCG_SER_P              BIT_ULL(24)  /* MCA recovery/new status bits */
-#define MCG_LMCE_P             BIT_ULL(27)  /* Local machine check supported */
-#define MCG_CMCI_P             BIT_ULL(10)  /* CMCI supported */
-#define KVM_MAX_MCE_BANKS 32
-#define MCG_CAP_BANKS_MASK 0xff       /* Bit 0-7 of the MCG_CAP register are #banks */
-#define MCI_STATUS_VAL (1ULL << 63)   /* valid error */
-#define MCI_STATUS_UC (1ULL << 61)    /* uncorrected error */
-#define MCI_STATUS_EN (1ULL << 60)    /* error enabled */
-#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
-#define MCM_ADDR_PHYS 2    /* physical address */
-#define MCI_CTL2_CMCI_EN               BIT_ULL(30)
-
-#endif /* SELFTEST_KVM_MCE_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86_64/pmu.h
deleted file mode 100644 (file)
index 3c10c4d..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2023, Tencent, Inc.
- */
-#ifndef SELFTEST_KVM_PMU_H
-#define SELFTEST_KVM_PMU_H
-
-#include <stdint.h>
-
-#define KVM_PMU_EVENT_FILTER_MAX_EVENTS                        300
-
-/*
- * Encode an eventsel+umask pair into event-select MSR format.  Note, this is
- * technically AMD's format, as Intel's format only supports 8 bits for the
- * event selector, i.e. doesn't use bits 24:16 for the selector.  But, OR-ing
- * in '0' is a nop and won't clobber the CMASK.
- */
-#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) |     \
-                                   ((eventsel) & 0xff) |               \
-                                   ((umask) & 0xff) << 8)
-
-/*
- * These are technically Intel's definitions, but except for CMASK (see above),
- * AMD's layout is compatible with Intel's.
- */
-#define ARCH_PERFMON_EVENTSEL_EVENT            GENMASK_ULL(7, 0)
-#define ARCH_PERFMON_EVENTSEL_UMASK            GENMASK_ULL(15, 8)
-#define ARCH_PERFMON_EVENTSEL_USR              BIT_ULL(16)
-#define ARCH_PERFMON_EVENTSEL_OS               BIT_ULL(17)
-#define ARCH_PERFMON_EVENTSEL_EDGE             BIT_ULL(18)
-#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL      BIT_ULL(19)
-#define ARCH_PERFMON_EVENTSEL_INT              BIT_ULL(20)
-#define ARCH_PERFMON_EVENTSEL_ANY              BIT_ULL(21)
-#define ARCH_PERFMON_EVENTSEL_ENABLE           BIT_ULL(22)
-#define ARCH_PERFMON_EVENTSEL_INV              BIT_ULL(23)
-#define ARCH_PERFMON_EVENTSEL_CMASK            GENMASK_ULL(31, 24)
-
-/* RDPMC control flags, Intel only. */
-#define INTEL_RDPMC_METRICS                    BIT_ULL(29)
-#define INTEL_RDPMC_FIXED                      BIT_ULL(30)
-#define INTEL_RDPMC_FAST                       BIT_ULL(31)
-
-/* Fixed PMC controls, Intel only. */
-#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx)     BIT_ULL((32 + (_idx)))
-
-#define FIXED_PMC_KERNEL                       BIT_ULL(0)
-#define FIXED_PMC_USER                         BIT_ULL(1)
-#define FIXED_PMC_ANYTHREAD                    BIT_ULL(2)
-#define FIXED_PMC_ENABLE_PMI                   BIT_ULL(3)
-#define FIXED_PMC_NR_BITS                      4
-#define FIXED_PMC_CTRL(_idx, _val)             ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
-
-#define PMU_CAP_FW_WRITES                      BIT_ULL(13)
-#define PMU_CAP_LBR_FMT                                0x3f
-
-#define        INTEL_ARCH_CPU_CYCLES                   RAW_EVENT(0x3c, 0x00)
-#define        INTEL_ARCH_INSTRUCTIONS_RETIRED         RAW_EVENT(0xc0, 0x00)
-#define        INTEL_ARCH_REFERENCE_CYCLES             RAW_EVENT(0x3c, 0x01)
-#define        INTEL_ARCH_LLC_REFERENCES               RAW_EVENT(0x2e, 0x4f)
-#define        INTEL_ARCH_LLC_MISSES                   RAW_EVENT(0x2e, 0x41)
-#define        INTEL_ARCH_BRANCHES_RETIRED             RAW_EVENT(0xc4, 0x00)
-#define        INTEL_ARCH_BRANCHES_MISPREDICTED        RAW_EVENT(0xc5, 0x00)
-#define        INTEL_ARCH_TOPDOWN_SLOTS                RAW_EVENT(0xa4, 0x01)
-
-#define        AMD_ZEN_CORE_CYCLES                     RAW_EVENT(0x76, 0x00)
-#define        AMD_ZEN_INSTRUCTIONS_RETIRED            RAW_EVENT(0xc0, 0x00)
-#define        AMD_ZEN_BRANCHES_RETIRED                RAW_EVENT(0xc2, 0x00)
-#define        AMD_ZEN_BRANCHES_MISPREDICTED           RAW_EVENT(0xc3, 0x00)
-
-/*
- * Note!  The order and thus the index of the architectural events matters as
- * support for each event is enumerated via CPUID using the index of the event.
- */
-enum intel_pmu_architectural_events {
-       INTEL_ARCH_CPU_CYCLES_INDEX,
-       INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
-       INTEL_ARCH_REFERENCE_CYCLES_INDEX,
-       INTEL_ARCH_LLC_REFERENCES_INDEX,
-       INTEL_ARCH_LLC_MISSES_INDEX,
-       INTEL_ARCH_BRANCHES_RETIRED_INDEX,
-       INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
-       INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
-       NR_INTEL_ARCH_EVENTS,
-};
-
-enum amd_pmu_zen_events {
-       AMD_ZEN_CORE_CYCLES_INDEX,
-       AMD_ZEN_INSTRUCTIONS_INDEX,
-       AMD_ZEN_BRANCHES_INDEX,
-       AMD_ZEN_BRANCH_MISSES_INDEX,
-       NR_AMD_ZEN_EVENTS,
-};
-
-extern const uint64_t intel_pmu_arch_events[];
-extern const uint64_t amd_pmu_zen_events[];
-
-#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
deleted file mode 100644 (file)
index 645200e..0000000
+++ /dev/null
@@ -1,1397 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/processor.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_PROCESSOR_H
-#define SELFTEST_KVM_PROCESSOR_H
-
-#include <assert.h>
-#include <stdint.h>
-#include <syscall.h>
-
-#include <asm/msr-index.h>
-#include <asm/prctl.h>
-
-#include <linux/kvm_para.h>
-#include <linux/stringify.h>
-
-#include "kvm_util.h"
-#include "ucall_common.h"
-
-extern bool host_cpu_is_intel;
-extern bool host_cpu_is_amd;
-extern uint64_t guest_tsc_khz;
-
-#ifndef MAX_NR_CPUID_ENTRIES
-#define MAX_NR_CPUID_ENTRIES 100
-#endif
-
-/* Forced emulation prefix, used to invoke the emulator unconditionally. */
-#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
-
-#define NMI_VECTOR             0x02
-
-#define X86_EFLAGS_FIXED        (1u << 1)
-
-#define X86_CR4_VME            (1ul << 0)
-#define X86_CR4_PVI            (1ul << 1)
-#define X86_CR4_TSD            (1ul << 2)
-#define X86_CR4_DE             (1ul << 3)
-#define X86_CR4_PSE            (1ul << 4)
-#define X86_CR4_PAE            (1ul << 5)
-#define X86_CR4_MCE            (1ul << 6)
-#define X86_CR4_PGE            (1ul << 7)
-#define X86_CR4_PCE            (1ul << 8)
-#define X86_CR4_OSFXSR         (1ul << 9)
-#define X86_CR4_OSXMMEXCPT     (1ul << 10)
-#define X86_CR4_UMIP           (1ul << 11)
-#define X86_CR4_LA57           (1ul << 12)
-#define X86_CR4_VMXE           (1ul << 13)
-#define X86_CR4_SMXE           (1ul << 14)
-#define X86_CR4_FSGSBASE       (1ul << 16)
-#define X86_CR4_PCIDE          (1ul << 17)
-#define X86_CR4_OSXSAVE                (1ul << 18)
-#define X86_CR4_SMEP           (1ul << 20)
-#define X86_CR4_SMAP           (1ul << 21)
-#define X86_CR4_PKE            (1ul << 22)
-
-struct xstate_header {
-       u64                             xstate_bv;
-       u64                             xcomp_bv;
-       u64                             reserved[6];
-} __attribute__((packed));
-
-struct xstate {
-       u8                              i387[512];
-       struct xstate_header            header;
-       u8                              extended_state_area[0];
-} __attribute__ ((packed, aligned (64)));
-
-#define XFEATURE_MASK_FP               BIT_ULL(0)
-#define XFEATURE_MASK_SSE              BIT_ULL(1)
-#define XFEATURE_MASK_YMM              BIT_ULL(2)
-#define XFEATURE_MASK_BNDREGS          BIT_ULL(3)
-#define XFEATURE_MASK_BNDCSR           BIT_ULL(4)
-#define XFEATURE_MASK_OPMASK           BIT_ULL(5)
-#define XFEATURE_MASK_ZMM_Hi256                BIT_ULL(6)
-#define XFEATURE_MASK_Hi16_ZMM         BIT_ULL(7)
-#define XFEATURE_MASK_PT               BIT_ULL(8)
-#define XFEATURE_MASK_PKRU             BIT_ULL(9)
-#define XFEATURE_MASK_PASID            BIT_ULL(10)
-#define XFEATURE_MASK_CET_USER         BIT_ULL(11)
-#define XFEATURE_MASK_CET_KERNEL       BIT_ULL(12)
-#define XFEATURE_MASK_LBR              BIT_ULL(15)
-#define XFEATURE_MASK_XTILE_CFG                BIT_ULL(17)
-#define XFEATURE_MASK_XTILE_DATA       BIT_ULL(18)
-
-#define XFEATURE_MASK_AVX512           (XFEATURE_MASK_OPMASK | \
-                                        XFEATURE_MASK_ZMM_Hi256 | \
-                                        XFEATURE_MASK_Hi16_ZMM)
-#define XFEATURE_MASK_XTILE            (XFEATURE_MASK_XTILE_DATA | \
-                                        XFEATURE_MASK_XTILE_CFG)
-
-/* Note, these are ordered alphabetically to match kvm_cpuid_entry2.  Eww. */
-enum cpuid_output_regs {
-       KVM_CPUID_EAX,
-       KVM_CPUID_EBX,
-       KVM_CPUID_ECX,
-       KVM_CPUID_EDX
-};
-
-/*
- * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
- * passed by value with no overhead.
- */
-struct kvm_x86_cpu_feature {
-       u32     function;
-       u16     index;
-       u8      reg;
-       u8      bit;
-};
-#define        KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit)                                \
-({                                                                             \
-       struct kvm_x86_cpu_feature feature = {                                  \
-               .function = fn,                                                 \
-               .index = idx,                                                   \
-               .reg = KVM_CPUID_##gpr,                                         \
-               .bit = __bit,                                                   \
-       };                                                                      \
-                                                                               \
-       kvm_static_assert((fn & 0xc0000000) == 0 ||                             \
-                         (fn & 0xc0000000) == 0x40000000 ||                    \
-                         (fn & 0xc0000000) == 0x80000000 ||                    \
-                         (fn & 0xc0000000) == 0xc0000000);                     \
-       kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE));    \
-       feature;                                                                \
-})
-
-/*
- * Basic Leafs, a.k.a. Intel defined
- */
-#define        X86_FEATURE_MWAIT               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
-#define        X86_FEATURE_VMX                 KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
-#define        X86_FEATURE_SMX                 KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
-#define        X86_FEATURE_PDCM                KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
-#define        X86_FEATURE_PCID                KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
-#define X86_FEATURE_X2APIC             KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
-#define        X86_FEATURE_MOVBE               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
-#define        X86_FEATURE_TSC_DEADLINE_TIMER  KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
-#define        X86_FEATURE_XSAVE               KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
-#define        X86_FEATURE_OSXSAVE             KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
-#define        X86_FEATURE_RDRAND              KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
-#define        X86_FEATURE_HYPERVISOR          KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
-#define X86_FEATURE_PAE                        KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
-#define        X86_FEATURE_MCE                 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
-#define        X86_FEATURE_APIC                KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
-#define        X86_FEATURE_CLFLUSH             KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
-#define        X86_FEATURE_XMM                 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
-#define        X86_FEATURE_XMM2                KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
-#define        X86_FEATURE_FSGSBASE            KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
-#define        X86_FEATURE_TSC_ADJUST          KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
-#define        X86_FEATURE_SGX                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
-#define        X86_FEATURE_HLE                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
-#define        X86_FEATURE_SMEP                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
-#define        X86_FEATURE_INVPCID             KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
-#define        X86_FEATURE_RTM                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
-#define        X86_FEATURE_MPX                 KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
-#define        X86_FEATURE_SMAP                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
-#define        X86_FEATURE_PCOMMIT             KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
-#define        X86_FEATURE_CLFLUSHOPT          KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
-#define        X86_FEATURE_CLWB                KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
-#define        X86_FEATURE_UMIP                KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
-#define        X86_FEATURE_PKU                 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
-#define        X86_FEATURE_OSPKE               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
-#define        X86_FEATURE_LA57                KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
-#define        X86_FEATURE_RDPID               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
-#define        X86_FEATURE_SGX_LC              KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
-#define        X86_FEATURE_SHSTK               KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
-#define        X86_FEATURE_IBT                 KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
-#define        X86_FEATURE_AMX_TILE            KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
-#define        X86_FEATURE_SPEC_CTRL           KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
-#define        X86_FEATURE_ARCH_CAPABILITIES   KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
-#define        X86_FEATURE_PKS                 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
-#define        X86_FEATURE_XTILECFG            KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
-#define        X86_FEATURE_XTILEDATA           KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
-#define        X86_FEATURE_XSAVES              KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
-#define        X86_FEATURE_XFD                 KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
-#define X86_FEATURE_XTILEDATA_XFD      KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
-
-/*
- * Extended Leafs, a.k.a. AMD defined
- */
-#define        X86_FEATURE_SVM                 KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
-#define        X86_FEATURE_NX                  KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
-#define        X86_FEATURE_GBPAGES             KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
-#define        X86_FEATURE_RDTSCP              KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
-#define        X86_FEATURE_LM                  KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
-#define        X86_FEATURE_INVTSC              KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
-#define        X86_FEATURE_RDPRU               KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
-#define        X86_FEATURE_AMD_IBPB            KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
-#define        X86_FEATURE_NPT                 KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
-#define        X86_FEATURE_LBRV                KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
-#define        X86_FEATURE_NRIPS               KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
-#define X86_FEATURE_TSCRATEMSR          KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
-#define X86_FEATURE_PAUSEFILTER         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
-#define X86_FEATURE_PFTHRESHOLD         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
-#define        X86_FEATURE_VGIF                KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
-#define X86_FEATURE_SEV                        KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
-#define X86_FEATURE_SEV_ES             KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
-
-/*
- * KVM defined paravirt features.
- */
-#define X86_FEATURE_KVM_CLOCKSOURCE    KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
-#define X86_FEATURE_KVM_NOP_IO_DELAY   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
-#define X86_FEATURE_KVM_MMU_OP         KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
-#define X86_FEATURE_KVM_CLOCKSOURCE2   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
-#define X86_FEATURE_KVM_ASYNC_PF       KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
-#define X86_FEATURE_KVM_STEAL_TIME     KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
-#define X86_FEATURE_KVM_PV_EOI         KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
-#define X86_FEATURE_KVM_PV_UNHALT      KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
-/* Bit 8 apparently isn't used?!?! */
-#define X86_FEATURE_KVM_PV_TLB_FLUSH   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
-#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT        KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
-#define X86_FEATURE_KVM_PV_SEND_IPI    KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
-#define X86_FEATURE_KVM_POLL_CONTROL   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
-#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
-#define X86_FEATURE_KVM_ASYNC_PF_INT   KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
-#define X86_FEATURE_KVM_MSI_EXT_DEST_ID        KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
-#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE       KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
-#define X86_FEATURE_KVM_MIGRATION_CONTROL      KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
-
-/*
- * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
- * value/property as opposed to a single-bit feature.  Again, pack the info
- * into a 64-bit value to pass by value with no overhead.
- */
-struct kvm_x86_cpu_property {
-       u32     function;
-       u8      index;
-       u8      reg;
-       u8      lo_bit;
-       u8      hi_bit;
-};
-#define        KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit)                   \
-({                                                                             \
-       struct kvm_x86_cpu_property property = {                                \
-               .function = fn,                                                 \
-               .index = idx,                                                   \
-               .reg = KVM_CPUID_##gpr,                                         \
-               .lo_bit = low_bit,                                              \
-               .hi_bit = high_bit,                                             \
-       };                                                                      \
-                                                                               \
-       kvm_static_assert(low_bit < high_bit);                                  \
-       kvm_static_assert((fn & 0xc0000000) == 0 ||                             \
-                         (fn & 0xc0000000) == 0x40000000 ||                    \
-                         (fn & 0xc0000000) == 0x80000000 ||                    \
-                         (fn & 0xc0000000) == 0xc0000000);                     \
-       kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE));   \
-       property;                                                               \
-})
-
-#define X86_PROPERTY_MAX_BASIC_LEAF            KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
-#define X86_PROPERTY_PMU_VERSION               KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
-#define X86_PROPERTY_PMU_NR_GP_COUNTERS                KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
-#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
-#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
-#define X86_PROPERTY_PMU_EVENTS_MASK           KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
-#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK        KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
-#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS     KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
-#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH      KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
-
-#define X86_PROPERTY_SUPPORTED_XCR0_LO         KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
-#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0      KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
-#define X86_PROPERTY_XSTATE_MAX_SIZE           KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
-#define X86_PROPERTY_SUPPORTED_XCR0_HI         KVM_X86_CPU_PROPERTY(0xd,  0, EDX,  0, 31)
-
-#define X86_PROPERTY_XSTATE_TILE_SIZE          KVM_X86_CPU_PROPERTY(0xd, 18, EAX,  0, 31)
-#define X86_PROPERTY_XSTATE_TILE_OFFSET                KVM_X86_CPU_PROPERTY(0xd, 18, EBX,  0, 31)
-#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES    KVM_X86_CPU_PROPERTY(0x1d, 0, EAX,  0, 31)
-#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES      KVM_X86_CPU_PROPERTY(0x1d, 1, EAX,  0, 15)
-#define X86_PROPERTY_AMX_BYTES_PER_TILE                KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
-#define X86_PROPERTY_AMX_BYTES_PER_ROW         KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0,  15)
-#define X86_PROPERTY_AMX_NR_TILE_REGS          KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
-#define X86_PROPERTY_AMX_MAX_ROWS              KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0,  15)
-
-#define X86_PROPERTY_MAX_KVM_LEAF              KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
-
-#define X86_PROPERTY_MAX_EXT_LEAF              KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
-#define X86_PROPERTY_MAX_PHY_ADDR              KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
-#define X86_PROPERTY_MAX_VIRT_ADDR             KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
-#define X86_PROPERTY_GUEST_MAX_PHY_ADDR                KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
-#define X86_PROPERTY_SEV_C_BIT                 KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
-#define X86_PROPERTY_PHYS_ADDR_REDUCTION       KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
-
-#define X86_PROPERTY_MAX_CENTAUR_LEAF          KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
-
-/*
- * Intel's architectural PMU events are bizarre.  They have a "feature" bit
- * that indicates the feature is _not_ supported, and a property that states
- * the length of the bit mask of unsupported features.  A feature is supported
- * if the size of the bit mask is larger than the "unavailable" bit, and said
- * bit is not set.  Fixed counters also bizarre enumeration, but inverted from
- * arch events for general purpose counters.  Fixed counters are supported if a
- * feature flag is set **OR** the total number of fixed counters is greater
- * than index of the counter.
- *
- * Wrap the events for general purpose and fixed counters to simplify checking
- * whether or not a given architectural event is supported.
- */
-struct kvm_x86_pmu_feature {
-       struct kvm_x86_cpu_feature f;
-};
-#define        KVM_X86_PMU_FEATURE(__reg, __bit)                               \
-({                                                                     \
-       struct kvm_x86_pmu_feature feature = {                          \
-               .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit),         \
-       };                                                              \
-                                                                       \
-       kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX ||         \
-                         KVM_CPUID_##__reg == KVM_CPUID_ECX);          \
-       feature;                                                        \
-})
-
-#define X86_PMU_FEATURE_CPU_CYCLES                     KVM_X86_PMU_FEATURE(EBX, 0)
-#define X86_PMU_FEATURE_INSNS_RETIRED                  KVM_X86_PMU_FEATURE(EBX, 1)
-#define X86_PMU_FEATURE_REFERENCE_CYCLES               KVM_X86_PMU_FEATURE(EBX, 2)
-#define X86_PMU_FEATURE_LLC_REFERENCES                 KVM_X86_PMU_FEATURE(EBX, 3)
-#define X86_PMU_FEATURE_LLC_MISSES                     KVM_X86_PMU_FEATURE(EBX, 4)
-#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED           KVM_X86_PMU_FEATURE(EBX, 5)
-#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED          KVM_X86_PMU_FEATURE(EBX, 6)
-#define X86_PMU_FEATURE_TOPDOWN_SLOTS                  KVM_X86_PMU_FEATURE(EBX, 7)
-
-#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED            KVM_X86_PMU_FEATURE(ECX, 0)
-#define X86_PMU_FEATURE_CPU_CYCLES_FIXED               KVM_X86_PMU_FEATURE(ECX, 1)
-#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED     KVM_X86_PMU_FEATURE(ECX, 2)
-#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED            KVM_X86_PMU_FEATURE(ECX, 3)
-
-static inline unsigned int x86_family(unsigned int eax)
-{
-       unsigned int x86;
-
-       x86 = (eax >> 8) & 0xf;
-
-       if (x86 == 0xf)
-               x86 += (eax >> 20) & 0xff;
-
-       return x86;
-}
-
-static inline unsigned int x86_model(unsigned int eax)
-{
-       return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
-}
-
-/* Page table bitfield declarations */
-#define PTE_PRESENT_MASK        BIT_ULL(0)
-#define PTE_WRITABLE_MASK       BIT_ULL(1)
-#define PTE_USER_MASK           BIT_ULL(2)
-#define PTE_ACCESSED_MASK       BIT_ULL(5)
-#define PTE_DIRTY_MASK          BIT_ULL(6)
-#define PTE_LARGE_MASK          BIT_ULL(7)
-#define PTE_GLOBAL_MASK         BIT_ULL(8)
-#define PTE_NX_MASK             BIT_ULL(63)
-
-#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
-
-#define PAGE_SHIFT             12
-#define PAGE_SIZE              (1ULL << PAGE_SHIFT)
-#define PAGE_MASK              (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
-
-#define HUGEPAGE_SHIFT(x)      (PAGE_SHIFT + (((x) - 1) * 9))
-#define HUGEPAGE_SIZE(x)       (1UL << HUGEPAGE_SHIFT(x))
-#define HUGEPAGE_MASK(x)       (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
-
-#define PTE_GET_PA(pte)                ((pte) & PHYSICAL_PAGE_MASK)
-#define PTE_GET_PFN(pte)        (PTE_GET_PA(pte) >> PAGE_SHIFT)
-
-/* General Registers in 64-Bit Mode */
-struct gpr64_regs {
-       u64 rax;
-       u64 rcx;
-       u64 rdx;
-       u64 rbx;
-       u64 rsp;
-       u64 rbp;
-       u64 rsi;
-       u64 rdi;
-       u64 r8;
-       u64 r9;
-       u64 r10;
-       u64 r11;
-       u64 r12;
-       u64 r13;
-       u64 r14;
-       u64 r15;
-};
-
-struct desc64 {
-       uint16_t limit0;
-       uint16_t base0;
-       unsigned base1:8, type:4, s:1, dpl:2, p:1;
-       unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
-       uint32_t base3;
-       uint32_t zero1;
-} __attribute__((packed));
-
-struct desc_ptr {
-       uint16_t size;
-       uint64_t address;
-} __attribute__((packed));
-
-struct kvm_x86_state {
-       struct kvm_xsave *xsave;
-       struct kvm_vcpu_events events;
-       struct kvm_mp_state mp_state;
-       struct kvm_regs regs;
-       struct kvm_xcrs xcrs;
-       struct kvm_sregs sregs;
-       struct kvm_debugregs debugregs;
-       union {
-               struct kvm_nested_state nested;
-               char nested_[16384];
-       };
-       struct kvm_msrs msrs;
-};
-
-static inline uint64_t get_desc64_base(const struct desc64 *desc)
-{
-       return ((uint64_t)desc->base3 << 32) |
-               (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
-}
-
-static inline uint64_t rdtsc(void)
-{
-       uint32_t eax, edx;
-       uint64_t tsc_val;
-       /*
-        * The lfence is to wait (on Intel CPUs) until all previous
-        * instructions have been executed. If software requires RDTSC to be
-        * executed prior to execution of any subsequent instruction, it can
-        * execute LFENCE immediately after RDTSC
-        */
-       __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
-       tsc_val = ((uint64_t)edx) << 32 | eax;
-       return tsc_val;
-}
-
-static inline uint64_t rdtscp(uint32_t *aux)
-{
-       uint32_t eax, edx;
-
-       __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
-       return ((uint64_t)edx) << 32 | eax;
-}
-
-static inline uint64_t rdmsr(uint32_t msr)
-{
-       uint32_t a, d;
-
-       __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
-       return a | ((uint64_t) d << 32);
-}
-
-static inline void wrmsr(uint32_t msr, uint64_t value)
-{
-       uint32_t a = value;
-       uint32_t d = value >> 32;
-
-       __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-
-static inline uint16_t inw(uint16_t port)
-{
-       uint16_t tmp;
-
-       __asm__ __volatile__("in %%dx, %%ax"
-               : /* output */ "=a" (tmp)
-               : /* input */ "d" (port));
-
-       return tmp;
-}
-
-static inline uint16_t get_es(void)
-{
-       uint16_t es;
-
-       __asm__ __volatile__("mov %%es, %[es]"
-                            : /* output */ [es]"=rm"(es));
-       return es;
-}
-
-static inline uint16_t get_cs(void)
-{
-       uint16_t cs;
-
-       __asm__ __volatile__("mov %%cs, %[cs]"
-                            : /* output */ [cs]"=rm"(cs));
-       return cs;
-}
-
-static inline uint16_t get_ss(void)
-{
-       uint16_t ss;
-
-       __asm__ __volatile__("mov %%ss, %[ss]"
-                            : /* output */ [ss]"=rm"(ss));
-       return ss;
-}
-
-static inline uint16_t get_ds(void)
-{
-       uint16_t ds;
-
-       __asm__ __volatile__("mov %%ds, %[ds]"
-                            : /* output */ [ds]"=rm"(ds));
-       return ds;
-}
-
-static inline uint16_t get_fs(void)
-{
-       uint16_t fs;
-
-       __asm__ __volatile__("mov %%fs, %[fs]"
-                            : /* output */ [fs]"=rm"(fs));
-       return fs;
-}
-
-static inline uint16_t get_gs(void)
-{
-       uint16_t gs;
-
-       __asm__ __volatile__("mov %%gs, %[gs]"
-                            : /* output */ [gs]"=rm"(gs));
-       return gs;
-}
-
-static inline uint16_t get_tr(void)
-{
-       uint16_t tr;
-
-       __asm__ __volatile__("str %[tr]"
-                            : /* output */ [tr]"=rm"(tr));
-       return tr;
-}
-
-static inline uint64_t get_cr0(void)
-{
-       uint64_t cr0;
-
-       __asm__ __volatile__("mov %%cr0, %[cr0]"
-                            : /* output */ [cr0]"=r"(cr0));
-       return cr0;
-}
-
-static inline uint64_t get_cr3(void)
-{
-       uint64_t cr3;
-
-       __asm__ __volatile__("mov %%cr3, %[cr3]"
-                            : /* output */ [cr3]"=r"(cr3));
-       return cr3;
-}
-
-static inline uint64_t get_cr4(void)
-{
-       uint64_t cr4;
-
-       __asm__ __volatile__("mov %%cr4, %[cr4]"
-                            : /* output */ [cr4]"=r"(cr4));
-       return cr4;
-}
-
-static inline void set_cr4(uint64_t val)
-{
-       __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
-}
-
-static inline u64 xgetbv(u32 index)
-{
-       u32 eax, edx;
-
-       __asm__ __volatile__("xgetbv;"
-                    : "=a" (eax), "=d" (edx)
-                    : "c" (index));
-       return eax | ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
-       u32 eax = value;
-       u32 edx = value >> 32;
-
-       __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
-}
-
-static inline void wrpkru(u32 pkru)
-{
-       /* Note, ECX and EDX are architecturally required to be '0'. */
-       asm volatile(".byte 0x0f,0x01,0xef\n\t"
-                    : : "a" (pkru), "c"(0), "d"(0));
-}
-
-static inline struct desc_ptr get_gdt(void)
-{
-       struct desc_ptr gdt;
-       __asm__ __volatile__("sgdt %[gdt]"
-                            : /* output */ [gdt]"=m"(gdt));
-       return gdt;
-}
-
-static inline struct desc_ptr get_idt(void)
-{
-       struct desc_ptr idt;
-       __asm__ __volatile__("sidt %[idt]"
-                            : /* output */ [idt]"=m"(idt));
-       return idt;
-}
-
-static inline void outl(uint16_t port, uint32_t value)
-{
-       __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
-}
-
-static inline void __cpuid(uint32_t function, uint32_t index,
-                          uint32_t *eax, uint32_t *ebx,
-                          uint32_t *ecx, uint32_t *edx)
-{
-       *eax = function;
-       *ecx = index;
-
-       asm volatile("cpuid"
-           : "=a" (*eax),
-             "=b" (*ebx),
-             "=c" (*ecx),
-             "=d" (*edx)
-           : "0" (*eax), "2" (*ecx)
-           : "memory");
-}
-
-static inline void cpuid(uint32_t function,
-                        uint32_t *eax, uint32_t *ebx,
-                        uint32_t *ecx, uint32_t *edx)
-{
-       return __cpuid(function, 0, eax, ebx, ecx, edx);
-}
-
-static inline uint32_t this_cpu_fms(void)
-{
-       uint32_t eax, ebx, ecx, edx;
-
-       cpuid(1, &eax, &ebx, &ecx, &edx);
-       return eax;
-}
-
-static inline uint32_t this_cpu_family(void)
-{
-       return x86_family(this_cpu_fms());
-}
-
-static inline uint32_t this_cpu_model(void)
-{
-       return x86_model(this_cpu_fms());
-}
-
-static inline bool this_cpu_vendor_string_is(const char *vendor)
-{
-       const uint32_t *chunk = (const uint32_t *)vendor;
-       uint32_t eax, ebx, ecx, edx;
-
-       cpuid(0, &eax, &ebx, &ecx, &edx);
-       return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
-}
-
-static inline bool this_cpu_is_intel(void)
-{
-       return this_cpu_vendor_string_is("GenuineIntel");
-}
-
-/*
- * Exclude early K5 samples with a vendor string of "AMDisbetter!"
- */
-static inline bool this_cpu_is_amd(void)
-{
-       return this_cpu_vendor_string_is("AuthenticAMD");
-}
-
-static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
-                                     uint8_t reg, uint8_t lo, uint8_t hi)
-{
-       uint32_t gprs[4];
-
-       __cpuid(function, index,
-               &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
-               &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
-
-       return (gprs[reg] & GENMASK(hi, lo)) >> lo;
-}
-
-static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
-{
-       return __this_cpu_has(feature.function, feature.index,
-                             feature.reg, feature.bit, feature.bit);
-}
-
-static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
-{
-       return __this_cpu_has(property.function, property.index,
-                             property.reg, property.lo_bit, property.hi_bit);
-}
-
-static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
-{
-       uint32_t max_leaf;
-
-       switch (property.function & 0xc0000000) {
-       case 0:
-               max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
-               break;
-       case 0x40000000:
-               max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
-               break;
-       case 0x80000000:
-               max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
-               break;
-       case 0xc0000000:
-               max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
-       }
-       return max_leaf >= property.function;
-}
-
-static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
-{
-       uint32_t nr_bits;
-
-       if (feature.f.reg == KVM_CPUID_EBX) {
-               nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
-               return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
-       }
-
-       GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
-       nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       return nr_bits > feature.f.bit || this_cpu_has(feature.f);
-}
-
-static __always_inline uint64_t this_cpu_supported_xcr0(void)
-{
-       if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
-               return 0;
-
-       return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
-              ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
-}
-
-typedef u32            __attribute__((vector_size(16))) sse128_t;
-#define __sse128_u     union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
-#define sse128_lo(x)   ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
-#define sse128_hi(x)   ({ __sse128_u t; t.vec = x; t.as_u64[1]; })
-
-static inline void read_sse_reg(int reg, sse128_t *data)
-{
-       switch (reg) {
-       case 0:
-               asm("movdqa %%xmm0, %0" : "=m"(*data));
-               break;
-       case 1:
-               asm("movdqa %%xmm1, %0" : "=m"(*data));
-               break;
-       case 2:
-               asm("movdqa %%xmm2, %0" : "=m"(*data));
-               break;
-       case 3:
-               asm("movdqa %%xmm3, %0" : "=m"(*data));
-               break;
-       case 4:
-               asm("movdqa %%xmm4, %0" : "=m"(*data));
-               break;
-       case 5:
-               asm("movdqa %%xmm5, %0" : "=m"(*data));
-               break;
-       case 6:
-               asm("movdqa %%xmm6, %0" : "=m"(*data));
-               break;
-       case 7:
-               asm("movdqa %%xmm7, %0" : "=m"(*data));
-               break;
-       default:
-               BUG();
-       }
-}
-
-static inline void write_sse_reg(int reg, const sse128_t *data)
-{
-       switch (reg) {
-       case 0:
-               asm("movdqa %0, %%xmm0" : : "m"(*data));
-               break;
-       case 1:
-               asm("movdqa %0, %%xmm1" : : "m"(*data));
-               break;
-       case 2:
-               asm("movdqa %0, %%xmm2" : : "m"(*data));
-               break;
-       case 3:
-               asm("movdqa %0, %%xmm3" : : "m"(*data));
-               break;
-       case 4:
-               asm("movdqa %0, %%xmm4" : : "m"(*data));
-               break;
-       case 5:
-               asm("movdqa %0, %%xmm5" : : "m"(*data));
-               break;
-       case 6:
-               asm("movdqa %0, %%xmm6" : : "m"(*data));
-               break;
-       case 7:
-               asm("movdqa %0, %%xmm7" : : "m"(*data));
-               break;
-       default:
-               BUG();
-       }
-}
-
-static inline void cpu_relax(void)
-{
-       asm volatile("rep; nop" ::: "memory");
-}
-
-static inline void udelay(unsigned long usec)
-{
-       uint64_t start, now, cycles;
-
-       GUEST_ASSERT(guest_tsc_khz);
-       cycles = guest_tsc_khz / 1000 * usec;
-
-       /*
-        * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
-        * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
-        */
-       start = rdtsc();
-       do {
-               now = rdtsc();
-       } while (now - start < cycles);
-}
-
-#define ud2()                  \
-       __asm__ __volatile__(   \
-               "ud2\n" \
-               )
-
-#define hlt()                  \
-       __asm__ __volatile__(   \
-               "hlt\n" \
-               )
-
-struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
-void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
-void kvm_x86_state_cleanup(struct kvm_x86_state *state);
-
-const struct kvm_msr_list *kvm_get_msr_index_list(void);
-const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
-bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
-uint64_t kvm_get_feature_msr(uint64_t msr_index);
-
-static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
-                                struct kvm_msrs *msrs)
-{
-       int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
-
-       TEST_ASSERT(r == msrs->nmsrs,
-                   "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
-                   r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
-}
-static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
-{
-       int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
-
-       TEST_ASSERT(r == msrs->nmsrs,
-                   "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
-                   r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
-}
-static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
-                                     struct kvm_debugregs *debugregs)
-{
-       vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
-}
-static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
-                                     struct kvm_debugregs *debugregs)
-{
-       vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
-}
-static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
-                                 struct kvm_xsave *xsave)
-{
-       vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
-}
-static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
-                                  struct kvm_xsave *xsave)
-{
-       vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
-}
-static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
-                                 struct kvm_xsave *xsave)
-{
-       vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
-}
-static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
-                                struct kvm_xcrs *xcrs)
-{
-       vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
-}
-static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
-{
-       vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
-}
-
-const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
-                                              uint32_t function, uint32_t index);
-const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-
-static inline uint32_t kvm_cpu_fms(void)
-{
-       return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
-}
-
-static inline uint32_t kvm_cpu_family(void)
-{
-       return x86_family(kvm_cpu_fms());
-}
-
-static inline uint32_t kvm_cpu_model(void)
-{
-       return x86_model(kvm_cpu_fms());
-}
-
-bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
-                  struct kvm_x86_cpu_feature feature);
-
-static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
-{
-       return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
-}
-
-uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
-                           struct kvm_x86_cpu_property property);
-
-static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
-{
-       return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
-}
-
-static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
-{
-       uint32_t max_leaf;
-
-       switch (property.function & 0xc0000000) {
-       case 0:
-               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
-               break;
-       case 0x40000000:
-               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
-               break;
-       case 0x80000000:
-               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
-               break;
-       case 0xc0000000:
-               max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
-       }
-       return max_leaf >= property.function;
-}
-
-static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
-{
-       uint32_t nr_bits;
-
-       if (feature.f.reg == KVM_CPUID_EBX) {
-               nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
-               return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
-       }
-
-       TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
-       nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
-}
-
-static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
-{
-       if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
-               return 0;
-
-       return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
-              ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
-}
-
-static inline size_t kvm_cpuid2_size(int nr_entries)
-{
-       return sizeof(struct kvm_cpuid2) +
-              sizeof(struct kvm_cpuid_entry2) * nr_entries;
-}
-
-/*
- * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
- * entries sized to hold @nr_entries.  The caller is responsible for freeing
- * the struct.
- */
-static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
-{
-       struct kvm_cpuid2 *cpuid;
-
-       cpuid = malloc(kvm_cpuid2_size(nr_entries));
-       TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
-
-       cpuid->nent = nr_entries;
-
-       return cpuid;
-}
-
-void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
-
-static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
-                                                             uint32_t function,
-                                                             uint32_t index)
-{
-       return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
-                                                         function, index);
-}
-
-static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
-                                                           uint32_t function)
-{
-       return __vcpu_get_cpuid_entry(vcpu, function, 0);
-}
-
-static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
-{
-       int r;
-
-       TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
-       r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
-       if (r)
-               return r;
-
-       /* On success, refresh the cache to pick up adjustments made by KVM. */
-       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-       return 0;
-}
-
-static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
-{
-       TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
-       vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
-
-       /* Refresh the cache to pick up adjustments made by KVM. */
-       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-}
-
-static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
-{
-       vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-}
-
-void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
-                            struct kvm_x86_cpu_property property,
-                            uint32_t value);
-void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
-
-void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
-
-static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
-                                 struct kvm_x86_cpu_feature feature)
-{
-       struct kvm_cpuid_entry2 *entry;
-
-       entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
-       return *((&entry->eax) + feature.reg) & BIT(feature.bit);
-}
-
-void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
-                                    struct kvm_x86_cpu_feature feature,
-                                    bool set);
-
-static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
-                                         struct kvm_x86_cpu_feature feature)
-{
-       vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
-
-}
-
-static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
-                                           struct kvm_x86_cpu_feature feature)
-{
-       vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
-}
-
-uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
-int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
-
-/*
- * Assert on an MSR access(es) and pretty print the MSR name when possible.
- * Note, the caller provides the stringified name so that the name of macro is
- * printed, not the value the macro resolves to (due to macro expansion).
- */
-#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...)                          \
-do {                                                                           \
-       if (__builtin_constant_p(msr)) {                                        \
-               TEST_ASSERT(cond, fmt, str, args);                              \
-       } else if (!(cond)) {                                                   \
-               char buf[16];                                                   \
-                                                                               \
-               snprintf(buf, sizeof(buf), "MSR 0x%x", msr);                    \
-               TEST_ASSERT(cond, fmt, buf, args);                              \
-       }                                                                       \
-} while (0)
-
-/*
- * Returns true if KVM should return the last written value when reading an MSR
- * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
- * is changing, etc.  This is NOT an exhaustive list!  The intent is to filter
- * out MSRs that are not durable _and_ that a selftest wants to write.
- */
-static inline bool is_durable_msr(uint32_t msr)
-{
-       return msr != MSR_IA32_TSC;
-}
-
-#define vcpu_set_msr(vcpu, msr, val)                                                   \
-do {                                                                                   \
-       uint64_t r, v = val;                                                            \
-                                                                                       \
-       TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1,                               \
-                       "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v);      \
-       if (!is_durable_msr(msr))                                                       \
-               break;                                                                  \
-       r = vcpu_get_msr(vcpu, msr);                                                    \
-       TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
-} while (0)
-
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
-void kvm_init_vm_address_properties(struct kvm_vm *vm);
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
-
-struct ex_regs {
-       uint64_t rax, rcx, rdx, rbx;
-       uint64_t rbp, rsi, rdi;
-       uint64_t r8, r9, r10, r11;
-       uint64_t r12, r13, r14, r15;
-       uint64_t vector;
-       uint64_t error_code;
-       uint64_t rip;
-       uint64_t cs;
-       uint64_t rflags;
-};
-
-struct idt_entry {
-       uint16_t offset0;
-       uint16_t selector;
-       uint16_t ist : 3;
-       uint16_t : 5;
-       uint16_t type : 4;
-       uint16_t : 1;
-       uint16_t dpl : 2;
-       uint16_t p : 1;
-       uint16_t offset1;
-       uint32_t offset2; uint32_t reserved;
-};
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
-                       void (*handler)(struct ex_regs *));
-
-/* If a toddler were to say "abracadabra". */
-#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
-
-/*
- * KVM selftest exception fixup uses registers to coordinate with the exception
- * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
- * per-CPU data.  Using only registers avoids having to map memory into the
- * guest, doesn't require a valid, stable GS.base, and reduces the risk of
- * for recursive faults when accessing memory in the handler.  The downside to
- * using registers is that it restricts what registers can be used by the actual
- * instruction.  But, selftests are 64-bit only, making register* pressure a
- * minor concern.  Use r9-r11 as they are volatile, i.e. don't need to be saved
- * by the callee, and except for r11 are not implicit parameters to any
- * instructions.  Ideally, fixup would use r8-r10 and thus avoid implicit
- * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
- * is higher priority than testing non-faulting SYSCALL/SYSRET.
- *
- * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
- * is guaranteed to be non-zero on fault.
- *
- * REGISTER INPUTS:
- * r9  = MAGIC
- * r10 = RIP
- * r11 = new RIP on fault
- *
- * REGISTER OUTPUTS:
- * r9  = exception vector (non-zero)
- * r10 = error code
- */
-#define __KVM_ASM_SAFE(insn, fep)                              \
-       "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t"   \
-       "lea 1f(%%rip), %%r10\n\t"                              \
-       "lea 2f(%%rip), %%r11\n\t"                              \
-       fep "1: " insn "\n\t"                                   \
-       "xor %%r9, %%r9\n\t"                                    \
-       "2:\n\t"                                                \
-       "mov  %%r9b, %[vector]\n\t"                             \
-       "mov  %%r10, %[error_code]\n\t"
-
-#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
-#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
-
-#define KVM_ASM_SAFE_OUTPUTS(v, ec)    [vector] "=qm"(v), [error_code] "=rm"(ec)
-#define KVM_ASM_SAFE_CLOBBERS  "r9", "r10", "r11"
-
-#define kvm_asm_safe(insn, inputs...)                                  \
-({                                                                     \
-       uint64_t ign_error_code;                                        \
-       uint8_t vector;                                                 \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE(insn)                                 \
-                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
-                    : inputs                                           \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-       vector;                                                         \
-})
-
-#define kvm_asm_safe_ec(insn, error_code, inputs...)                   \
-({                                                                     \
-       uint8_t vector;                                                 \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE(insn)                                 \
-                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
-                    : inputs                                           \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-       vector;                                                         \
-})
-
-#define kvm_asm_safe_fep(insn, inputs...)                              \
-({                                                                     \
-       uint64_t ign_error_code;                                        \
-       uint8_t vector;                                                 \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE(insn)                                 \
-                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
-                    : inputs                                           \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-       vector;                                                         \
-})
-
-#define kvm_asm_safe_ec_fep(insn, error_code, inputs...)               \
-({                                                                     \
-       uint8_t vector;                                                 \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE_FEP(insn)                             \
-                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
-                    : inputs                                           \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-       vector;                                                         \
-})
-
-#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                   \
-static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val)  \
-{                                                                      \
-       uint64_t error_code;                                            \
-       uint8_t vector;                                                 \
-       uint32_t a, d;                                                  \
-                                                                       \
-       asm volatile(KVM_ASM_SAFE##_FEP(#insn)                          \
-                    : "=a"(a), "=d"(d),                                \
-                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
-                    : "c"(idx)                                         \
-                    : KVM_ASM_SAFE_CLOBBERS);                          \
-                                                                       \
-       *val = (uint64_t)a | ((uint64_t)d << 32);                       \
-       return vector;                                                  \
-}
-
-/*
- * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
- * use ECX as in input index, and EDX:EAX as a 64-bit output.
- */
-#define BUILD_READ_U64_SAFE_HELPERS(insn)                              \
-       BUILD_READ_U64_SAFE_HELPER(insn, , )                            \
-       BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                    \
-
-BUILD_READ_U64_SAFE_HELPERS(rdmsr)
-BUILD_READ_U64_SAFE_HELPERS(rdpmc)
-BUILD_READ_U64_SAFE_HELPERS(xgetbv)
-
-static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
-{
-       return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
-}
-
-static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
-{
-       u32 eax = value;
-       u32 edx = value >> 32;
-
-       return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
-}
-
-bool kvm_is_tdp_enabled(void);
-
-static inline bool kvm_is_pmu_enabled(void)
-{
-       return get_kvm_param_bool("enable_pmu");
-}
-
-static inline bool kvm_is_forced_emulation_enabled(void)
-{
-       return !!get_kvm_param_integer("force_emulation_prefix");
-}
-
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
-                                   int *level);
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
-
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
-                      uint64_t a3);
-uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
-void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
-
-static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
-                                                    uint64_t size, uint64_t flags)
-{
-       return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
-}
-
-static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
-                                              uint64_t flags)
-{
-       uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
-
-       GUEST_ASSERT(!ret);
-}
-
-void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
-
-#define vm_xsave_require_permission(xfeature)  \
-       __vm_xsave_require_permission(xfeature, #xfeature)
-
-enum pg_level {
-       PG_LEVEL_NONE,
-       PG_LEVEL_4K,
-       PG_LEVEL_2M,
-       PG_LEVEL_1G,
-       PG_LEVEL_512G,
-       PG_LEVEL_NUM
-};
-
-#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
-#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
-
-#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
-#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
-#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
-
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
-void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-                   uint64_t nr_bytes, int level);
-
-/*
- * Basic CPU control in CR0
- */
-#define X86_CR0_PE          (1UL<<0) /* Protection Enable */
-#define X86_CR0_MP          (1UL<<1) /* Monitor Coprocessor */
-#define X86_CR0_EM          (1UL<<2) /* Emulation */
-#define X86_CR0_TS          (1UL<<3) /* Task Switched */
-#define X86_CR0_ET          (1UL<<4) /* Extension Type */
-#define X86_CR0_NE          (1UL<<5) /* Numeric Error */
-#define X86_CR0_WP          (1UL<<16) /* Write Protect */
-#define X86_CR0_AM          (1UL<<18) /* Alignment Mask */
-#define X86_CR0_NW          (1UL<<29) /* Not Write-through */
-#define X86_CR0_CD          (1UL<<30) /* Cache Disable */
-#define X86_CR0_PG          (1UL<<31) /* Paging */
-
-#define PFERR_PRESENT_BIT 0
-#define PFERR_WRITE_BIT 1
-#define PFERR_USER_BIT 2
-#define PFERR_RSVD_BIT 3
-#define PFERR_FETCH_BIT 4
-#define PFERR_PK_BIT 5
-#define PFERR_SGX_BIT 15
-#define PFERR_GUEST_FINAL_BIT 32
-#define PFERR_GUEST_PAGE_BIT 33
-#define PFERR_IMPLICIT_ACCESS_BIT 48
-
-#define PFERR_PRESENT_MASK     BIT(PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK       BIT(PFERR_WRITE_BIT)
-#define PFERR_USER_MASK                BIT(PFERR_USER_BIT)
-#define PFERR_RSVD_MASK                BIT(PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK       BIT(PFERR_FETCH_BIT)
-#define PFERR_PK_MASK          BIT(PFERR_PK_BIT)
-#define PFERR_SGX_MASK         BIT(PFERR_SGX_BIT)
-#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK  BIT_ULL(PFERR_GUEST_PAGE_BIT)
-#define PFERR_IMPLICIT_ACCESS  BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
-
-bool sys_clocksource_is_based_on_tsc(void);
-
-#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h
deleted file mode 100644 (file)
index 82c11c8..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Helpers used for SEV guests
- *
- */
-#ifndef SELFTEST_KVM_SEV_H
-#define SELFTEST_KVM_SEV_H
-
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "linux/psp-sev.h"
-
-#include "kvm_util.h"
-#include "svm_util.h"
-#include "processor.h"
-
-enum sev_guest_state {
-       SEV_GUEST_STATE_UNINITIALIZED = 0,
-       SEV_GUEST_STATE_LAUNCH_UPDATE,
-       SEV_GUEST_STATE_LAUNCH_SECRET,
-       SEV_GUEST_STATE_RUNNING,
-};
-
-#define SEV_POLICY_NO_DBG      (1UL << 0)
-#define SEV_POLICY_ES          (1UL << 2)
-
-#define GHCB_MSR_TERM_REQ      0x100
-
-void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
-void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
-void sev_vm_launch_finish(struct kvm_vm *vm);
-
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
-                                          struct kvm_vcpu **cpu);
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
-
-kvm_static_assert(SEV_RET_SUCCESS == 0);
-
-/*
- * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
- * instead of a proper struct.  The size of the parameter is embedded in the
- * ioctl number, i.e. is ABI and thus immutable.  Hack around the mess by
- * creating an overlay to pass in an "unsigned long" without a cast (casting
- * will make the compiler unhappy due to dereferencing an aliased pointer).
- */
-#define __vm_sev_ioctl(vm, cmd, arg)                                   \
-({                                                                     \
-       int r;                                                          \
-                                                                       \
-       union {                                                         \
-               struct kvm_sev_cmd c;                                   \
-               unsigned long raw;                                      \
-       } sev_cmd = { .c = {                                            \
-               .id = (cmd),                                            \
-               .data = (uint64_t)(arg),                                \
-               .sev_fd = (vm)->arch.sev_fd,                            \
-       } };                                                            \
-                                                                       \
-       r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw);        \
-       r ?: sev_cmd.c.error;                                           \
-})
-
-#define vm_sev_ioctl(vm, cmd, arg)                                     \
-({                                                                     \
-       int ret = __vm_sev_ioctl(vm, cmd, arg);                         \
-                                                                       \
-       __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm);               \
-})
-
-void sev_vm_init(struct kvm_vm *vm);
-void sev_es_vm_init(struct kvm_vm *vm);
-
-static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
-                                                struct userspace_mem_region *region)
-{
-       struct kvm_enc_region range = {
-               .addr = region->region.userspace_addr,
-               .size = region->region.memory_size,
-       };
-
-       vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
-}
-
-static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
-                                         uint64_t size)
-{
-       struct kvm_sev_launch_update_data update_data = {
-               .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
-               .len = size,
-       };
-
-       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
-}
-
-#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
deleted file mode 100644 (file)
index 4803e10..0000000
+++ /dev/null
@@ -1,326 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm.h
- * This is a copy of arch/x86/include/asm/svm.h
- *
- */
-
-#ifndef SELFTEST_KVM_SVM_H
-#define SELFTEST_KVM_SVM_H
-
-enum {
-       INTERCEPT_INTR,
-       INTERCEPT_NMI,
-       INTERCEPT_SMI,
-       INTERCEPT_INIT,
-       INTERCEPT_VINTR,
-       INTERCEPT_SELECTIVE_CR0,
-       INTERCEPT_STORE_IDTR,
-       INTERCEPT_STORE_GDTR,
-       INTERCEPT_STORE_LDTR,
-       INTERCEPT_STORE_TR,
-       INTERCEPT_LOAD_IDTR,
-       INTERCEPT_LOAD_GDTR,
-       INTERCEPT_LOAD_LDTR,
-       INTERCEPT_LOAD_TR,
-       INTERCEPT_RDTSC,
-       INTERCEPT_RDPMC,
-       INTERCEPT_PUSHF,
-       INTERCEPT_POPF,
-       INTERCEPT_CPUID,
-       INTERCEPT_RSM,
-       INTERCEPT_IRET,
-       INTERCEPT_INTn,
-       INTERCEPT_INVD,
-       INTERCEPT_PAUSE,
-       INTERCEPT_HLT,
-       INTERCEPT_INVLPG,
-       INTERCEPT_INVLPGA,
-       INTERCEPT_IOIO_PROT,
-       INTERCEPT_MSR_PROT,
-       INTERCEPT_TASK_SWITCH,
-       INTERCEPT_FERR_FREEZE,
-       INTERCEPT_SHUTDOWN,
-       INTERCEPT_VMRUN,
-       INTERCEPT_VMMCALL,
-       INTERCEPT_VMLOAD,
-       INTERCEPT_VMSAVE,
-       INTERCEPT_STGI,
-       INTERCEPT_CLGI,
-       INTERCEPT_SKINIT,
-       INTERCEPT_RDTSCP,
-       INTERCEPT_ICEBP,
-       INTERCEPT_WBINVD,
-       INTERCEPT_MONITOR,
-       INTERCEPT_MWAIT,
-       INTERCEPT_MWAIT_COND,
-       INTERCEPT_XSETBV,
-       INTERCEPT_RDPRU,
-};
-
-struct hv_vmcb_enlightenments {
-       struct __packed hv_enlightenments_control {
-               u32 nested_flush_hypercall:1;
-               u32 msr_bitmap:1;
-               u32 enlightened_npt_tlb: 1;
-               u32 reserved:29;
-       } __packed hv_enlightenments_control;
-       u32 hv_vp_id;
-       u64 hv_vm_id;
-       u64 partition_assist_page;
-       u64 reserved;
-} __packed;
-
-/*
- * Hyper-V uses the software reserved clean bit in VMCB
- */
-#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
-
-/* Synthetic VM-Exit */
-#define HV_SVM_EXITCODE_ENL                    0xf0000000
-#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH   (1)
-
-struct __attribute__ ((__packed__)) vmcb_control_area {
-       u32 intercept_cr;
-       u32 intercept_dr;
-       u32 intercept_exceptions;
-       u64 intercept;
-       u8 reserved_1[40];
-       u16 pause_filter_thresh;
-       u16 pause_filter_count;
-       u64 iopm_base_pa;
-       u64 msrpm_base_pa;
-       u64 tsc_offset;
-       u32 asid;
-       u8 tlb_ctl;
-       u8 reserved_2[3];
-       u32 int_ctl;
-       u32 int_vector;
-       u32 int_state;
-       u8 reserved_3[4];
-       u32 exit_code;
-       u32 exit_code_hi;
-       u64 exit_info_1;
-       u64 exit_info_2;
-       u32 exit_int_info;
-       u32 exit_int_info_err;
-       u64 nested_ctl;
-       u64 avic_vapic_bar;
-       u8 reserved_4[8];
-       u32 event_inj;
-       u32 event_inj_err;
-       u64 nested_cr3;
-       u64 virt_ext;
-       u32 clean;
-       u32 reserved_5;
-       u64 next_rip;
-       u8 insn_len;
-       u8 insn_bytes[15];
-       u64 avic_backing_page;  /* Offset 0xe0 */
-       u8 reserved_6[8];       /* Offset 0xe8 */
-       u64 avic_logical_id;    /* Offset 0xf0 */
-       u64 avic_physical_id;   /* Offset 0xf8 */
-       u8 reserved_7[8];
-       u64 vmsa_pa;            /* Used for an SEV-ES guest */
-       u8 reserved_8[720];
-       /*
-        * Offset 0x3e0, 32 bytes reserved
-        * for use by hypervisor/software.
-        */
-       union {
-               struct hv_vmcb_enlightenments hv_enlightenments;
-               u8 reserved_sw[32];
-       };
-};
-
-
-#define TLB_CONTROL_DO_NOTHING 0
-#define TLB_CONTROL_FLUSH_ALL_ASID 1
-#define TLB_CONTROL_FLUSH_ASID 3
-#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
-
-#define V_TPR_MASK 0x0f
-
-#define V_IRQ_SHIFT 8
-#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
-
-#define V_GIF_SHIFT 9
-#define V_GIF_MASK (1 << V_GIF_SHIFT)
-
-#define V_INTR_PRIO_SHIFT 16
-#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
-
-#define V_IGN_TPR_SHIFT 20
-#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
-
-#define V_INTR_MASKING_SHIFT 24
-#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
-
-#define V_GIF_ENABLE_SHIFT 25
-#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
-
-#define AVIC_ENABLE_SHIFT 31
-#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
-
-#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
-#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
-
-#define SVM_INTERRUPT_SHADOW_MASK 1
-
-#define SVM_IOIO_STR_SHIFT 2
-#define SVM_IOIO_REP_SHIFT 3
-#define SVM_IOIO_SIZE_SHIFT 4
-#define SVM_IOIO_ASIZE_SHIFT 7
-
-#define SVM_IOIO_TYPE_MASK 1
-#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
-#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
-#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
-#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
-
-#define SVM_VM_CR_VALID_MASK   0x001fULL
-#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
-#define SVM_VM_CR_SVM_DIS_MASK  0x0010ULL
-
-#define SVM_NESTED_CTL_NP_ENABLE       BIT(0)
-#define SVM_NESTED_CTL_SEV_ENABLE      BIT(1)
-
-struct __attribute__ ((__packed__)) vmcb_seg {
-       u16 selector;
-       u16 attrib;
-       u32 limit;
-       u64 base;
-};
-
-struct __attribute__ ((__packed__)) vmcb_save_area {
-       struct vmcb_seg es;
-       struct vmcb_seg cs;
-       struct vmcb_seg ss;
-       struct vmcb_seg ds;
-       struct vmcb_seg fs;
-       struct vmcb_seg gs;
-       struct vmcb_seg gdtr;
-       struct vmcb_seg ldtr;
-       struct vmcb_seg idtr;
-       struct vmcb_seg tr;
-       u8 reserved_1[43];
-       u8 cpl;
-       u8 reserved_2[4];
-       u64 efer;
-       u8 reserved_3[112];
-       u64 cr4;
-       u64 cr3;
-       u64 cr0;
-       u64 dr7;
-       u64 dr6;
-       u64 rflags;
-       u64 rip;
-       u8 reserved_4[88];
-       u64 rsp;
-       u8 reserved_5[24];
-       u64 rax;
-       u64 star;
-       u64 lstar;
-       u64 cstar;
-       u64 sfmask;
-       u64 kernel_gs_base;
-       u64 sysenter_cs;
-       u64 sysenter_esp;
-       u64 sysenter_eip;
-       u64 cr2;
-       u8 reserved_6[32];
-       u64 g_pat;
-       u64 dbgctl;
-       u64 br_from;
-       u64 br_to;
-       u64 last_excp_from;
-       u64 last_excp_to;
-};
-
-struct __attribute__ ((__packed__)) vmcb {
-       struct vmcb_control_area control;
-       struct vmcb_save_area save;
-};
-
-#define SVM_VM_CR_SVM_DISABLE 4
-
-#define SVM_SELECTOR_S_SHIFT 4
-#define SVM_SELECTOR_DPL_SHIFT 5
-#define SVM_SELECTOR_P_SHIFT 7
-#define SVM_SELECTOR_AVL_SHIFT 8
-#define SVM_SELECTOR_L_SHIFT 9
-#define SVM_SELECTOR_DB_SHIFT 10
-#define SVM_SELECTOR_G_SHIFT 11
-
-#define SVM_SELECTOR_TYPE_MASK (0xf)
-#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
-#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
-#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
-#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
-#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
-#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
-#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
-
-#define SVM_SELECTOR_WRITE_MASK (1 << 1)
-#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
-#define SVM_SELECTOR_CODE_MASK (1 << 3)
-
-#define INTERCEPT_CR0_READ     0
-#define INTERCEPT_CR3_READ     3
-#define INTERCEPT_CR4_READ     4
-#define INTERCEPT_CR8_READ     8
-#define INTERCEPT_CR0_WRITE    (16 + 0)
-#define INTERCEPT_CR3_WRITE    (16 + 3)
-#define INTERCEPT_CR4_WRITE    (16 + 4)
-#define INTERCEPT_CR8_WRITE    (16 + 8)
-
-#define INTERCEPT_DR0_READ     0
-#define INTERCEPT_DR1_READ     1
-#define INTERCEPT_DR2_READ     2
-#define INTERCEPT_DR3_READ     3
-#define INTERCEPT_DR4_READ     4
-#define INTERCEPT_DR5_READ     5
-#define INTERCEPT_DR6_READ     6
-#define INTERCEPT_DR7_READ     7
-#define INTERCEPT_DR0_WRITE    (16 + 0)
-#define INTERCEPT_DR1_WRITE    (16 + 1)
-#define INTERCEPT_DR2_WRITE    (16 + 2)
-#define INTERCEPT_DR3_WRITE    (16 + 3)
-#define INTERCEPT_DR4_WRITE    (16 + 4)
-#define INTERCEPT_DR5_WRITE    (16 + 5)
-#define INTERCEPT_DR6_WRITE    (16 + 6)
-#define INTERCEPT_DR7_WRITE    (16 + 7)
-
-#define SVM_EVTINJ_VEC_MASK 0xff
-
-#define SVM_EVTINJ_TYPE_SHIFT 8
-#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_VALID (1 << 31)
-#define SVM_EVTINJ_VALID_ERR (1 << 11)
-
-#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
-#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
-
-#define        SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
-#define        SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
-#define        SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
-#define        SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
-
-#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
-#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
-
-#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
-#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
-#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
-
-#define SVM_EXITINFO_REG_MASK 0x0F
-
-#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
-
-#endif /* SELFTEST_KVM_SVM_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
deleted file mode 100644 (file)
index 044f0f8..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm_utils.h
- * Header for nested SVM testing
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-
-#ifndef SELFTEST_KVM_SVM_UTILS_H
-#define SELFTEST_KVM_SVM_UTILS_H
-
-#include <asm/svm.h>
-
-#include <stdint.h>
-#include "svm.h"
-#include "processor.h"
-
-struct svm_test_data {
-       /* VMCB */
-       struct vmcb *vmcb; /* gva */
-       void *vmcb_hva;
-       uint64_t vmcb_gpa;
-
-       /* host state-save area */
-       struct vmcb_save_area *save_area; /* gva */
-       void *save_area_hva;
-       uint64_t save_area_gpa;
-
-       /* MSR-Bitmap */
-       void *msr; /* gva */
-       void *msr_hva;
-       uint64_t msr_gpa;
-};
-
-static inline void vmmcall(void)
-{
-       /*
-        * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
-        * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
-        * use of this function is to exit to L1 from L2.  Clobber all other
-        * GPRs as L1 doesn't correctly preserve them during vmexits.
-        */
-       __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp"
-                            : : "a"(0xdeadbeef), "c"(0xbeefdead)
-                            : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                              "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-#define stgi()                 \
-       __asm__ __volatile__(   \
-               "stgi\n"        \
-               )
-
-#define clgi()                 \
-       __asm__ __volatile__(   \
-               "clgi\n"        \
-               )
-
-struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
-void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
-void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
-
-int open_sev_dev_path_or_exit(void);
-
-#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h
deleted file mode 100644 (file)
index d3825dc..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_UCALL_H
-#define SELFTEST_KVM_UCALL_H
-
-#include "kvm_util.h"
-
-#define UCALL_EXIT_REASON       KVM_EXIT_IO
-
-static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-#endif
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
deleted file mode 100644 (file)
index 5f0c0a2..0000000
+++ /dev/null
@@ -1,577 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/x86_64/vmx.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_VMX_H
-#define SELFTEST_KVM_VMX_H
-
-#include <asm/vmx.h>
-
-#include <stdint.h>
-#include "processor.h"
-#include "apic.h"
-
-/*
- * Definitions of Primary Processor-Based VM-Execution Controls.
- */
-#define CPU_BASED_INTR_WINDOW_EXITING          0x00000004
-#define CPU_BASED_USE_TSC_OFFSETTING           0x00000008
-#define CPU_BASED_HLT_EXITING                  0x00000080
-#define CPU_BASED_INVLPG_EXITING               0x00000200
-#define CPU_BASED_MWAIT_EXITING                        0x00000400
-#define CPU_BASED_RDPMC_EXITING                        0x00000800
-#define CPU_BASED_RDTSC_EXITING                        0x00001000
-#define CPU_BASED_CR3_LOAD_EXITING             0x00008000
-#define CPU_BASED_CR3_STORE_EXITING            0x00010000
-#define CPU_BASED_CR8_LOAD_EXITING             0x00080000
-#define CPU_BASED_CR8_STORE_EXITING            0x00100000
-#define CPU_BASED_TPR_SHADOW                   0x00200000
-#define CPU_BASED_NMI_WINDOW_EXITING           0x00400000
-#define CPU_BASED_MOV_DR_EXITING               0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING            0x01000000
-#define CPU_BASED_USE_IO_BITMAPS               0x02000000
-#define CPU_BASED_MONITOR_TRAP                 0x08000000
-#define CPU_BASED_USE_MSR_BITMAPS              0x10000000
-#define CPU_BASED_MONITOR_EXITING              0x20000000
-#define CPU_BASED_PAUSE_EXITING                        0x40000000
-#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS  0x80000000
-
-#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR    0x0401e172
-
-/*
- * Definitions of Secondary Processor-Based VM-Execution Controls.
- */
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-#define SECONDARY_EXEC_ENABLE_EPT              0x00000002
-#define SECONDARY_EXEC_DESC                    0x00000004
-#define SECONDARY_EXEC_ENABLE_RDTSCP           0x00000008
-#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE  0x00000010
-#define SECONDARY_EXEC_ENABLE_VPID             0x00000020
-#define SECONDARY_EXEC_WBINVD_EXITING          0x00000040
-#define SECONDARY_EXEC_UNRESTRICTED_GUEST      0x00000080
-#define SECONDARY_EXEC_APIC_REGISTER_VIRT      0x00000100
-#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY   0x00000200
-#define SECONDARY_EXEC_PAUSE_LOOP_EXITING      0x00000400
-#define SECONDARY_EXEC_RDRAND_EXITING          0x00000800
-#define SECONDARY_EXEC_ENABLE_INVPCID          0x00001000
-#define SECONDARY_EXEC_ENABLE_VMFUNC           0x00002000
-#define SECONDARY_EXEC_SHADOW_VMCS             0x00004000
-#define SECONDARY_EXEC_RDSEED_EXITING          0x00010000
-#define SECONDARY_EXEC_ENABLE_PML              0x00020000
-#define SECONDARY_EPT_VE                       0x00040000
-#define SECONDARY_ENABLE_XSAV_RESTORE          0x00100000
-#define SECONDARY_EXEC_TSC_SCALING             0x02000000
-
-#define PIN_BASED_EXT_INTR_MASK                        0x00000001
-#define PIN_BASED_NMI_EXITING                  0x00000008
-#define PIN_BASED_VIRTUAL_NMIS                 0x00000020
-#define PIN_BASED_VMX_PREEMPTION_TIMER         0x00000040
-#define PIN_BASED_POSTED_INTR                  0x00000080
-
-#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR    0x00000016
-
-#define VM_EXIT_SAVE_DEBUG_CONTROLS            0x00000004
-#define VM_EXIT_HOST_ADDR_SPACE_SIZE           0x00000200
-#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL     0x00001000
-#define VM_EXIT_ACK_INTR_ON_EXIT               0x00008000
-#define VM_EXIT_SAVE_IA32_PAT                  0x00040000
-#define VM_EXIT_LOAD_IA32_PAT                  0x00080000
-#define VM_EXIT_SAVE_IA32_EFER                 0x00100000
-#define VM_EXIT_LOAD_IA32_EFER                 0x00200000
-#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER      0x00400000
-
-#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR      0x00036dff
-
-#define VM_ENTRY_LOAD_DEBUG_CONTROLS           0x00000004
-#define VM_ENTRY_IA32E_MODE                    0x00000200
-#define VM_ENTRY_SMM                           0x00000400
-#define VM_ENTRY_DEACT_DUAL_MONITOR            0x00000800
-#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL    0x00002000
-#define VM_ENTRY_LOAD_IA32_PAT                 0x00004000
-#define VM_ENTRY_LOAD_IA32_EFER                        0x00008000
-
-#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR     0x000011ff
-
-#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK    0x0000001f
-#define VMX_MISC_SAVE_EFER_LMA                 0x00000020
-
-#define VMX_EPT_VPID_CAP_1G_PAGES              0x00020000
-#define VMX_EPT_VPID_CAP_AD_BITS               0x00200000
-
-#define EXIT_REASON_FAILED_VMENTRY     0x80000000
-
-enum vmcs_field {
-       VIRTUAL_PROCESSOR_ID            = 0x00000000,
-       POSTED_INTR_NV                  = 0x00000002,
-       GUEST_ES_SELECTOR               = 0x00000800,
-       GUEST_CS_SELECTOR               = 0x00000802,
-       GUEST_SS_SELECTOR               = 0x00000804,
-       GUEST_DS_SELECTOR               = 0x00000806,
-       GUEST_FS_SELECTOR               = 0x00000808,
-       GUEST_GS_SELECTOR               = 0x0000080a,
-       GUEST_LDTR_SELECTOR             = 0x0000080c,
-       GUEST_TR_SELECTOR               = 0x0000080e,
-       GUEST_INTR_STATUS               = 0x00000810,
-       GUEST_PML_INDEX                 = 0x00000812,
-       HOST_ES_SELECTOR                = 0x00000c00,
-       HOST_CS_SELECTOR                = 0x00000c02,
-       HOST_SS_SELECTOR                = 0x00000c04,
-       HOST_DS_SELECTOR                = 0x00000c06,
-       HOST_FS_SELECTOR                = 0x00000c08,
-       HOST_GS_SELECTOR                = 0x00000c0a,
-       HOST_TR_SELECTOR                = 0x00000c0c,
-       IO_BITMAP_A                     = 0x00002000,
-       IO_BITMAP_A_HIGH                = 0x00002001,
-       IO_BITMAP_B                     = 0x00002002,
-       IO_BITMAP_B_HIGH                = 0x00002003,
-       MSR_BITMAP                      = 0x00002004,
-       MSR_BITMAP_HIGH                 = 0x00002005,
-       VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
-       VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
-       VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
-       VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
-       VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
-       VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
-       PML_ADDRESS                     = 0x0000200e,
-       PML_ADDRESS_HIGH                = 0x0000200f,
-       TSC_OFFSET                      = 0x00002010,
-       TSC_OFFSET_HIGH                 = 0x00002011,
-       VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
-       VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
-       APIC_ACCESS_ADDR                = 0x00002014,
-       APIC_ACCESS_ADDR_HIGH           = 0x00002015,
-       POSTED_INTR_DESC_ADDR           = 0x00002016,
-       POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
-       EPT_POINTER                     = 0x0000201a,
-       EPT_POINTER_HIGH                = 0x0000201b,
-       EOI_EXIT_BITMAP0                = 0x0000201c,
-       EOI_EXIT_BITMAP0_HIGH           = 0x0000201d,
-       EOI_EXIT_BITMAP1                = 0x0000201e,
-       EOI_EXIT_BITMAP1_HIGH           = 0x0000201f,
-       EOI_EXIT_BITMAP2                = 0x00002020,
-       EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
-       EOI_EXIT_BITMAP3                = 0x00002022,
-       EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
-       VMREAD_BITMAP                   = 0x00002026,
-       VMREAD_BITMAP_HIGH              = 0x00002027,
-       VMWRITE_BITMAP                  = 0x00002028,
-       VMWRITE_BITMAP_HIGH             = 0x00002029,
-       XSS_EXIT_BITMAP                 = 0x0000202C,
-       XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
-       ENCLS_EXITING_BITMAP            = 0x0000202E,
-       ENCLS_EXITING_BITMAP_HIGH       = 0x0000202F,
-       TSC_MULTIPLIER                  = 0x00002032,
-       TSC_MULTIPLIER_HIGH             = 0x00002033,
-       GUEST_PHYSICAL_ADDRESS          = 0x00002400,
-       GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
-       VMCS_LINK_POINTER               = 0x00002800,
-       VMCS_LINK_POINTER_HIGH          = 0x00002801,
-       GUEST_IA32_DEBUGCTL             = 0x00002802,
-       GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
-       GUEST_IA32_PAT                  = 0x00002804,
-       GUEST_IA32_PAT_HIGH             = 0x00002805,
-       GUEST_IA32_EFER                 = 0x00002806,
-       GUEST_IA32_EFER_HIGH            = 0x00002807,
-       GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
-       GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
-       GUEST_PDPTR0                    = 0x0000280a,
-       GUEST_PDPTR0_HIGH               = 0x0000280b,
-       GUEST_PDPTR1                    = 0x0000280c,
-       GUEST_PDPTR1_HIGH               = 0x0000280d,
-       GUEST_PDPTR2                    = 0x0000280e,
-       GUEST_PDPTR2_HIGH               = 0x0000280f,
-       GUEST_PDPTR3                    = 0x00002810,
-       GUEST_PDPTR3_HIGH               = 0x00002811,
-       GUEST_BNDCFGS                   = 0x00002812,
-       GUEST_BNDCFGS_HIGH              = 0x00002813,
-       HOST_IA32_PAT                   = 0x00002c00,
-       HOST_IA32_PAT_HIGH              = 0x00002c01,
-       HOST_IA32_EFER                  = 0x00002c02,
-       HOST_IA32_EFER_HIGH             = 0x00002c03,
-       HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
-       HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
-       PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
-       CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
-       EXCEPTION_BITMAP                = 0x00004004,
-       PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
-       PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
-       CR3_TARGET_COUNT                = 0x0000400a,
-       VM_EXIT_CONTROLS                = 0x0000400c,
-       VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
-       VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
-       VM_ENTRY_CONTROLS               = 0x00004012,
-       VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
-       VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
-       VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
-       VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
-       TPR_THRESHOLD                   = 0x0000401c,
-       SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
-       PLE_GAP                         = 0x00004020,
-       PLE_WINDOW                      = 0x00004022,
-       VM_INSTRUCTION_ERROR            = 0x00004400,
-       VM_EXIT_REASON                  = 0x00004402,
-       VM_EXIT_INTR_INFO               = 0x00004404,
-       VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
-       IDT_VECTORING_INFO_FIELD        = 0x00004408,
-       IDT_VECTORING_ERROR_CODE        = 0x0000440a,
-       VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
-       VMX_INSTRUCTION_INFO            = 0x0000440e,
-       GUEST_ES_LIMIT                  = 0x00004800,
-       GUEST_CS_LIMIT                  = 0x00004802,
-       GUEST_SS_LIMIT                  = 0x00004804,
-       GUEST_DS_LIMIT                  = 0x00004806,
-       GUEST_FS_LIMIT                  = 0x00004808,
-       GUEST_GS_LIMIT                  = 0x0000480a,
-       GUEST_LDTR_LIMIT                = 0x0000480c,
-       GUEST_TR_LIMIT                  = 0x0000480e,
-       GUEST_GDTR_LIMIT                = 0x00004810,
-       GUEST_IDTR_LIMIT                = 0x00004812,
-       GUEST_ES_AR_BYTES               = 0x00004814,
-       GUEST_CS_AR_BYTES               = 0x00004816,
-       GUEST_SS_AR_BYTES               = 0x00004818,
-       GUEST_DS_AR_BYTES               = 0x0000481a,
-       GUEST_FS_AR_BYTES               = 0x0000481c,
-       GUEST_GS_AR_BYTES               = 0x0000481e,
-       GUEST_LDTR_AR_BYTES             = 0x00004820,
-       GUEST_TR_AR_BYTES               = 0x00004822,
-       GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
-       GUEST_ACTIVITY_STATE            = 0X00004826,
-       GUEST_SYSENTER_CS               = 0x0000482A,
-       VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
-       HOST_IA32_SYSENTER_CS           = 0x00004c00,
-       CR0_GUEST_HOST_MASK             = 0x00006000,
-       CR4_GUEST_HOST_MASK             = 0x00006002,
-       CR0_READ_SHADOW                 = 0x00006004,
-       CR4_READ_SHADOW                 = 0x00006006,
-       CR3_TARGET_VALUE0               = 0x00006008,
-       CR3_TARGET_VALUE1               = 0x0000600a,
-       CR3_TARGET_VALUE2               = 0x0000600c,
-       CR3_TARGET_VALUE3               = 0x0000600e,
-       EXIT_QUALIFICATION              = 0x00006400,
-       GUEST_LINEAR_ADDRESS            = 0x0000640a,
-       GUEST_CR0                       = 0x00006800,
-       GUEST_CR3                       = 0x00006802,
-       GUEST_CR4                       = 0x00006804,
-       GUEST_ES_BASE                   = 0x00006806,
-       GUEST_CS_BASE                   = 0x00006808,
-       GUEST_SS_BASE                   = 0x0000680a,
-       GUEST_DS_BASE                   = 0x0000680c,
-       GUEST_FS_BASE                   = 0x0000680e,
-       GUEST_GS_BASE                   = 0x00006810,
-       GUEST_LDTR_BASE                 = 0x00006812,
-       GUEST_TR_BASE                   = 0x00006814,
-       GUEST_GDTR_BASE                 = 0x00006816,
-       GUEST_IDTR_BASE                 = 0x00006818,
-       GUEST_DR7                       = 0x0000681a,
-       GUEST_RSP                       = 0x0000681c,
-       GUEST_RIP                       = 0x0000681e,
-       GUEST_RFLAGS                    = 0x00006820,
-       GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
-       GUEST_SYSENTER_ESP              = 0x00006824,
-       GUEST_SYSENTER_EIP              = 0x00006826,
-       HOST_CR0                        = 0x00006c00,
-       HOST_CR3                        = 0x00006c02,
-       HOST_CR4                        = 0x00006c04,
-       HOST_FS_BASE                    = 0x00006c06,
-       HOST_GS_BASE                    = 0x00006c08,
-       HOST_TR_BASE                    = 0x00006c0a,
-       HOST_GDTR_BASE                  = 0x00006c0c,
-       HOST_IDTR_BASE                  = 0x00006c0e,
-       HOST_IA32_SYSENTER_ESP          = 0x00006c10,
-       HOST_IA32_SYSENTER_EIP          = 0x00006c12,
-       HOST_RSP                        = 0x00006c14,
-       HOST_RIP                        = 0x00006c16,
-};
-
-struct vmx_msr_entry {
-       uint32_t index;
-       uint32_t reserved;
-       uint64_t value;
-} __attribute__ ((aligned(16)));
-
-#include "evmcs.h"
-
-static inline int vmxon(uint64_t phys)
-{
-       uint8_t ret;
-
-       __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
-               : [ret]"=rm"(ret)
-               : [pa]"m"(phys)
-               : "cc", "memory");
-
-       return ret;
-}
-
-static inline void vmxoff(void)
-{
-       __asm__ __volatile__("vmxoff");
-}
-
-static inline int vmclear(uint64_t vmcs_pa)
-{
-       uint8_t ret;
-
-       __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
-               : [ret]"=rm"(ret)
-               : [pa]"m"(vmcs_pa)
-               : "cc", "memory");
-
-       return ret;
-}
-
-static inline int vmptrld(uint64_t vmcs_pa)
-{
-       uint8_t ret;
-
-       if (enable_evmcs)
-               return -1;
-
-       __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
-               : [ret]"=rm"(ret)
-               : [pa]"m"(vmcs_pa)
-               : "cc", "memory");
-
-       return ret;
-}
-
-static inline int vmptrst(uint64_t *value)
-{
-       uint64_t tmp;
-       uint8_t ret;
-
-       if (enable_evmcs)
-               return evmcs_vmptrst(value);
-
-       __asm__ __volatile__("vmptrst %[value]; setna %[ret]"
-               : [value]"=m"(tmp), [ret]"=rm"(ret)
-               : : "cc", "memory");
-
-       *value = tmp;
-       return ret;
-}
-
-/*
- * A wrapper around vmptrst that ignores errors and returns zero if the
- * vmptrst instruction fails.
- */
-static inline uint64_t vmptrstz(void)
-{
-       uint64_t value = 0;
-       vmptrst(&value);
-       return value;
-}
-
-/*
- * No guest state (e.g. GPRs) is established by this vmlaunch.
- */
-static inline int vmlaunch(void)
-{
-       int ret;
-
-       if (enable_evmcs)
-               return evmcs_vmlaunch();
-
-       __asm__ __volatile__("push %%rbp;"
-                            "push %%rcx;"
-                            "push %%rdx;"
-                            "push %%rsi;"
-                            "push %%rdi;"
-                            "push $0;"
-                            "vmwrite %%rsp, %[host_rsp];"
-                            "lea 1f(%%rip), %%rax;"
-                            "vmwrite %%rax, %[host_rip];"
-                            "vmlaunch;"
-                            "incq (%%rsp);"
-                            "1: pop %%rax;"
-                            "pop %%rdi;"
-                            "pop %%rsi;"
-                            "pop %%rdx;"
-                            "pop %%rcx;"
-                            "pop %%rbp;"
-                            : [ret]"=&a"(ret)
-                            : [host_rsp]"r"((uint64_t)HOST_RSP),
-                              [host_rip]"r"((uint64_t)HOST_RIP)
-                            : "memory", "cc", "rbx", "r8", "r9", "r10",
-                              "r11", "r12", "r13", "r14", "r15");
-       return ret;
-}
-
-/*
- * No guest state (e.g. GPRs) is established by this vmresume.
- */
-static inline int vmresume(void)
-{
-       int ret;
-
-       if (enable_evmcs)
-               return evmcs_vmresume();
-
-       __asm__ __volatile__("push %%rbp;"
-                            "push %%rcx;"
-                            "push %%rdx;"
-                            "push %%rsi;"
-                            "push %%rdi;"
-                            "push $0;"
-                            "vmwrite %%rsp, %[host_rsp];"
-                            "lea 1f(%%rip), %%rax;"
-                            "vmwrite %%rax, %[host_rip];"
-                            "vmresume;"
-                            "incq (%%rsp);"
-                            "1: pop %%rax;"
-                            "pop %%rdi;"
-                            "pop %%rsi;"
-                            "pop %%rdx;"
-                            "pop %%rcx;"
-                            "pop %%rbp;"
-                            : [ret]"=&a"(ret)
-                            : [host_rsp]"r"((uint64_t)HOST_RSP),
-                              [host_rip]"r"((uint64_t)HOST_RIP)
-                            : "memory", "cc", "rbx", "r8", "r9", "r10",
-                              "r11", "r12", "r13", "r14", "r15");
-       return ret;
-}
-
-static inline void vmcall(void)
-{
-       /*
-        * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
-        * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
-        * use of this function is to exit to L1 from L2.  Clobber all other
-        * GPRs as L1 doesn't correctly preserve them during vmexits.
-        */
-       __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp"
-                            : : "a"(0xdeadbeef), "c"(0xbeefdead)
-                            : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                              "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-static inline int vmread(uint64_t encoding, uint64_t *value)
-{
-       uint64_t tmp;
-       uint8_t ret;
-
-       if (enable_evmcs)
-               return evmcs_vmread(encoding, value);
-
-       __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
-               : [value]"=rm"(tmp), [ret]"=rm"(ret)
-               : [encoding]"r"(encoding)
-               : "cc", "memory");
-
-       *value = tmp;
-       return ret;
-}
-
-/*
- * A wrapper around vmread that ignores errors and returns zero if the
- * vmread instruction fails.
- */
-static inline uint64_t vmreadz(uint64_t encoding)
-{
-       uint64_t value = 0;
-       vmread(encoding, &value);
-       return value;
-}
-
-static inline int vmwrite(uint64_t encoding, uint64_t value)
-{
-       uint8_t ret;
-
-       if (enable_evmcs)
-               return evmcs_vmwrite(encoding, value);
-
-       __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
-               : [ret]"=rm"(ret)
-               : [value]"rm"(value), [encoding]"r"(encoding)
-               : "cc", "memory");
-
-       return ret;
-}
-
-static inline uint32_t vmcs_revision(void)
-{
-       return rdmsr(MSR_IA32_VMX_BASIC);
-}
-
-struct vmx_pages {
-       void *vmxon_hva;
-       uint64_t vmxon_gpa;
-       void *vmxon;
-
-       void *vmcs_hva;
-       uint64_t vmcs_gpa;
-       void *vmcs;
-
-       void *msr_hva;
-       uint64_t msr_gpa;
-       void *msr;
-
-       void *shadow_vmcs_hva;
-       uint64_t shadow_vmcs_gpa;
-       void *shadow_vmcs;
-
-       void *vmread_hva;
-       uint64_t vmread_gpa;
-       void *vmread;
-
-       void *vmwrite_hva;
-       uint64_t vmwrite_gpa;
-       void *vmwrite;
-
-       void *eptp_hva;
-       uint64_t eptp_gpa;
-       void *eptp;
-
-       void *apic_access_hva;
-       uint64_t apic_access_gpa;
-       void *apic_access;
-};
-
-union vmx_basic {
-       u64 val;
-       struct {
-               u32 revision;
-               u32     size:13,
-                       reserved1:3,
-                       width:1,
-                       dual:1,
-                       type:4,
-                       insouts:1,
-                       ctrl:1,
-                       vm_entry_exception_ctrl:1,
-                       reserved2:7;
-       };
-};
-
-union vmx_ctrl_msr {
-       u64 val;
-       struct {
-               u32 set, clr;
-       };
-};
-
-struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
-bool prepare_for_vmx_operation(struct vmx_pages *vmx);
-void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
-bool load_vmcs(struct vmx_pages *vmx);
-
-bool ept_1g_pages_supported(void);
-
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                  uint64_t nested_paddr, uint64_t paddr);
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                uint64_t nested_paddr, uint64_t paddr, uint64_t size);
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
-                       uint32_t memslot);
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
-                           uint64_t addr, uint64_t size);
-bool kvm_cpu_has_ept(void);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
-                 uint32_t eptp_memslot);
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
-
-#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
deleted file mode 100644 (file)
index 7abbf88..0000000
+++ /dev/null
@@ -1,157 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM Generic Interrupt Controller (GIC) support
- */
-
-#include <errno.h>
-#include <linux/bits.h>
-#include <linux/sizes.h>
-
-#include "kvm_util.h"
-
-#include <gic.h>
-#include "gic_private.h"
-#include "processor.h"
-#include "spinlock.h"
-
-static const struct gic_common_ops *gic_common_ops;
-static struct spinlock gic_lock;
-
-static void gic_cpu_init(unsigned int cpu)
-{
-       gic_common_ops->gic_cpu_init(cpu);
-}
-
-static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
-{
-       const struct gic_common_ops *gic_ops = NULL;
-
-       spin_lock(&gic_lock);
-
-       /* Distributor initialization is needed only once per VM */
-       if (gic_common_ops) {
-               spin_unlock(&gic_lock);
-               return;
-       }
-
-       if (type == GIC_V3)
-               gic_ops = &gicv3_ops;
-
-       GUEST_ASSERT(gic_ops);
-
-       gic_ops->gic_init(nr_cpus);
-       gic_common_ops = gic_ops;
-
-       /* Make sure that the initialized data is visible to all the vCPUs */
-       dsb(sy);
-
-       spin_unlock(&gic_lock);
-}
-
-void gic_init(enum gic_type type, unsigned int nr_cpus)
-{
-       uint32_t cpu = guest_get_vcpuid();
-
-       GUEST_ASSERT(type < GIC_TYPE_MAX);
-       GUEST_ASSERT(nr_cpus);
-
-       gic_dist_init(type, nr_cpus);
-       gic_cpu_init(cpu);
-}
-
-void gic_irq_enable(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_enable(intid);
-}
-
-void gic_irq_disable(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_disable(intid);
-}
-
-unsigned int gic_get_and_ack_irq(void)
-{
-       uint64_t irqstat;
-       unsigned int intid;
-
-       GUEST_ASSERT(gic_common_ops);
-
-       irqstat = gic_common_ops->gic_read_iar();
-       intid = irqstat & GENMASK(23, 0);
-
-       return intid;
-}
-
-void gic_set_eoi(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_write_eoir(intid);
-}
-
-void gic_set_dir(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_write_dir(intid);
-}
-
-void gic_set_eoi_split(bool split)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_set_eoi_split(split);
-}
-
-void gic_set_priority_mask(uint64_t pmr)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_set_priority_mask(pmr);
-}
-
-void gic_set_priority(unsigned int intid, unsigned int prio)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_set_priority(intid, prio);
-}
-
-void gic_irq_set_active(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_set_active(intid);
-}
-
-void gic_irq_clear_active(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_clear_active(intid);
-}
-
-bool gic_irq_get_active(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       return gic_common_ops->gic_irq_get_active(intid);
-}
-
-void gic_irq_set_pending(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_set_pending(intid);
-}
-
-void gic_irq_clear_pending(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_clear_pending(intid);
-}
-
-bool gic_irq_get_pending(unsigned int intid)
-{
-       GUEST_ASSERT(gic_common_ops);
-       return gic_common_ops->gic_irq_get_pending(intid);
-}
-
-void gic_irq_set_config(unsigned int intid, bool is_edge)
-{
-       GUEST_ASSERT(gic_common_ops);
-       gic_common_ops->gic_irq_set_config(intid, is_edge);
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
deleted file mode 100644 (file)
index d24e9ec..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ARM Generic Interrupt Controller (GIC) private defines that's only
- * shared among the GIC library code.
- */
-
-#ifndef SELFTEST_KVM_GIC_PRIVATE_H
-#define SELFTEST_KVM_GIC_PRIVATE_H
-
-struct gic_common_ops {
-       void (*gic_init)(unsigned int nr_cpus);
-       void (*gic_cpu_init)(unsigned int cpu);
-       void (*gic_irq_enable)(unsigned int intid);
-       void (*gic_irq_disable)(unsigned int intid);
-       uint64_t (*gic_read_iar)(void);
-       void (*gic_write_eoir)(uint32_t irq);
-       void (*gic_write_dir)(uint32_t irq);
-       void (*gic_set_eoi_split)(bool split);
-       void (*gic_set_priority_mask)(uint64_t mask);
-       void (*gic_set_priority)(uint32_t intid, uint32_t prio);
-       void (*gic_irq_set_active)(uint32_t intid);
-       void (*gic_irq_clear_active)(uint32_t intid);
-       bool (*gic_irq_get_active)(uint32_t intid);
-       void (*gic_irq_set_pending)(uint32_t intid);
-       void (*gic_irq_clear_pending)(uint32_t intid);
-       bool (*gic_irq_get_pending)(uint32_t intid);
-       void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
-};
-
-extern const struct gic_common_ops gicv3_ops;
-
-#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
deleted file mode 100644 (file)
index 66d0550..0000000
+++ /dev/null
@@ -1,427 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM Generic Interrupt Controller (GIC) v3 support
- */
-
-#include <linux/sizes.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "delay.h"
-
-#include "gic.h"
-#include "gic_v3.h"
-#include "gic_private.h"
-
-#define GICV3_MAX_CPUS                 512
-
-#define GICD_INT_DEF_PRI               0xa0
-#define GICD_INT_DEF_PRI_X4            ((GICD_INT_DEF_PRI << 24) |\
-                                       (GICD_INT_DEF_PRI << 16) |\
-                                       (GICD_INT_DEF_PRI << 8) |\
-                                       GICD_INT_DEF_PRI)
-
-#define ICC_PMR_DEF_PRIO               0xf0
-
-struct gicv3_data {
-       unsigned int nr_cpus;
-       unsigned int nr_spis;
-};
-
-#define sgi_base_from_redist(redist_base)      (redist_base + SZ_64K)
-#define DIST_BIT                               (1U << 31)
-
-enum gicv3_intid_range {
-       SGI_RANGE,
-       PPI_RANGE,
-       SPI_RANGE,
-       INVALID_RANGE,
-};
-
-static struct gicv3_data gicv3_data;
-
-static void gicv3_gicd_wait_for_rwp(void)
-{
-       unsigned int count = 100000; /* 1s */
-
-       while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) {
-               GUEST_ASSERT(count--);
-               udelay(10);
-       }
-}
-
-static inline volatile void *gicr_base_cpu(uint32_t cpu)
-{
-       /* Align all the redistributors sequentially */
-       return GICR_BASE_GVA + cpu * SZ_64K * 2;
-}
-
-static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
-{
-       unsigned int count = 100000; /* 1s */
-
-       while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) {
-               GUEST_ASSERT(count--);
-               udelay(10);
-       }
-}
-
-static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
-{
-       if (cpu_or_dist & DIST_BIT)
-               gicv3_gicd_wait_for_rwp();
-       else
-               gicv3_gicr_wait_for_rwp(cpu_or_dist);
-}
-
-static enum gicv3_intid_range get_intid_range(unsigned int intid)
-{
-       switch (intid) {
-       case 0 ... 15:
-               return SGI_RANGE;
-       case 16 ... 31:
-               return PPI_RANGE;
-       case 32 ... 1019:
-               return SPI_RANGE;
-       }
-
-       /* We should not be reaching here */
-       GUEST_ASSERT(0);
-
-       return INVALID_RANGE;
-}
-
-static uint64_t gicv3_read_iar(void)
-{
-       uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
-
-       dsb(sy);
-       return irqstat;
-}
-
-static void gicv3_write_eoir(uint32_t irq)
-{
-       write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
-       isb();
-}
-
-static void gicv3_write_dir(uint32_t irq)
-{
-       write_sysreg_s(irq, SYS_ICC_DIR_EL1);
-       isb();
-}
-
-static void gicv3_set_priority_mask(uint64_t mask)
-{
-       write_sysreg_s(mask, SYS_ICC_PMR_EL1);
-}
-
-static void gicv3_set_eoi_split(bool split)
-{
-       uint32_t val;
-
-       /*
-        * All other fields are read-only, so no need to read CTLR first. In
-        * fact, the kernel does the same.
-        */
-       val = split ? (1U << 1) : 0;
-       write_sysreg_s(val, SYS_ICC_CTLR_EL1);
-       isb();
-}
-
-uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
-{
-       volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
-                       : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
-       return readl(base + offset);
-}
-
-void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
-{
-       volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
-                       : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
-       writel(reg_val, base + offset);
-}
-
-uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
-{
-       return gicv3_reg_readl(cpu_or_dist, offset) & mask;
-}
-
-void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
-               uint32_t mask, uint32_t reg_val)
-{
-       uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
-
-       tmp |= (reg_val & mask);
-       gicv3_reg_writel(cpu_or_dist, offset, tmp);
-}
-
-/*
- * We use a single offset for the distributor and redistributor maps as they
- * have the same value in both. The only exceptions are registers that only
- * exist in one and not the other, like GICR_WAKER that doesn't exist in the
- * distributor map. Such registers are conveniently marked as reserved in the
- * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
- * marked as "Reserved" in the Distributor map.
- */
-static void gicv3_access_reg(uint32_t intid, uint64_t offset,
-               uint32_t reg_bits, uint32_t bits_per_field,
-               bool write, uint32_t *val)
-{
-       uint32_t cpu = guest_get_vcpuid();
-       enum gicv3_intid_range intid_range = get_intid_range(intid);
-       uint32_t fields_per_reg, index, mask, shift;
-       uint32_t cpu_or_dist;
-
-       GUEST_ASSERT(bits_per_field <= reg_bits);
-       GUEST_ASSERT(!write || *val < (1U << bits_per_field));
-       /*
-        * This function does not support 64 bit accesses. Just asserting here
-        * until we implement readq/writeq.
-        */
-       GUEST_ASSERT(reg_bits == 32);
-
-       fields_per_reg = reg_bits / bits_per_field;
-       index = intid % fields_per_reg;
-       shift = index * bits_per_field;
-       mask = ((1U << bits_per_field) - 1) << shift;
-
-       /* Set offset to the actual register holding intid's config. */
-       offset += (intid / fields_per_reg) * (reg_bits / 8);
-
-       cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
-
-       if (write)
-               gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
-       *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
-}
-
-static void gicv3_write_reg(uint32_t intid, uint64_t offset,
-               uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
-{
-       gicv3_access_reg(intid, offset, reg_bits,
-                       bits_per_field, true, &val);
-}
-
-static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
-               uint32_t reg_bits, uint32_t bits_per_field)
-{
-       uint32_t val;
-
-       gicv3_access_reg(intid, offset, reg_bits,
-                       bits_per_field, false, &val);
-       return val;
-}
-
-static void gicv3_set_priority(uint32_t intid, uint32_t prio)
-{
-       gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
-}
-
-/* Sets the intid to be level-sensitive or edge-triggered. */
-static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
-{
-       uint32_t val;
-
-       /* N/A for private interrupts. */
-       GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
-       val = is_edge ? 2 : 0;
-       gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
-}
-
-static void gicv3_irq_enable(uint32_t intid)
-{
-       bool is_spi = get_intid_range(intid) == SPI_RANGE;
-       uint32_t cpu = guest_get_vcpuid();
-
-       gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
-       gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
-}
-
-static void gicv3_irq_disable(uint32_t intid)
-{
-       bool is_spi = get_intid_range(intid) == SPI_RANGE;
-       uint32_t cpu = guest_get_vcpuid();
-
-       gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
-       gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
-}
-
-static void gicv3_irq_set_active(uint32_t intid)
-{
-       gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
-}
-
-static void gicv3_irq_clear_active(uint32_t intid)
-{
-       gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
-}
-
-static bool gicv3_irq_get_active(uint32_t intid)
-{
-       return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
-}
-
-static void gicv3_irq_set_pending(uint32_t intid)
-{
-       gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
-}
-
-static void gicv3_irq_clear_pending(uint32_t intid)
-{
-       gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
-}
-
-static bool gicv3_irq_get_pending(uint32_t intid)
-{
-       return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
-}
-
-static void gicv3_enable_redist(volatile void *redist_base)
-{
-       uint32_t val = readl(redist_base + GICR_WAKER);
-       unsigned int count = 100000; /* 1s */
-
-       val &= ~GICR_WAKER_ProcessorSleep;
-       writel(val, redist_base + GICR_WAKER);
-
-       /* Wait until the processor is 'active' */
-       while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
-               GUEST_ASSERT(count--);
-               udelay(10);
-       }
-}
-
-static void gicv3_cpu_init(unsigned int cpu)
-{
-       volatile void *sgi_base;
-       unsigned int i;
-       volatile void *redist_base_cpu;
-
-       GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
-
-       redist_base_cpu = gicr_base_cpu(cpu);
-       sgi_base = sgi_base_from_redist(redist_base_cpu);
-
-       gicv3_enable_redist(redist_base_cpu);
-
-       /*
-        * Mark all the SGI and PPI interrupts as non-secure Group-1.
-        * Also, deactivate and disable them.
-        */
-       writel(~0, sgi_base + GICR_IGROUPR0);
-       writel(~0, sgi_base + GICR_ICACTIVER0);
-       writel(~0, sgi_base + GICR_ICENABLER0);
-
-       /* Set a default priority for all the SGIs and PPIs */
-       for (i = 0; i < 32; i += 4)
-               writel(GICD_INT_DEF_PRI_X4,
-                               sgi_base + GICR_IPRIORITYR0 + i);
-
-       gicv3_gicr_wait_for_rwp(cpu);
-
-       /* Enable the GIC system register (ICC_*) access */
-       write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
-                       SYS_ICC_SRE_EL1);
-
-       /* Set a default priority threshold */
-       write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
-
-       /* Enable non-secure Group-1 interrupts */
-       write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
-}
-
-static void gicv3_dist_init(void)
-{
-       unsigned int i;
-
-       /* Disable the distributor until we set things up */
-       writel(0, GICD_BASE_GVA + GICD_CTLR);
-       gicv3_gicd_wait_for_rwp();
-
-       /*
-        * Mark all the SPI interrupts as non-secure Group-1.
-        * Also, deactivate and disable them.
-        */
-       for (i = 32; i < gicv3_data.nr_spis; i += 32) {
-               writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8);
-               writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8);
-               writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8);
-       }
-
-       /* Set a default priority for all the SPIs */
-       for (i = 32; i < gicv3_data.nr_spis; i += 4)
-               writel(GICD_INT_DEF_PRI_X4,
-                               GICD_BASE_GVA + GICD_IPRIORITYR + i);
-
-       /* Wait for the settings to sync-in */
-       gicv3_gicd_wait_for_rwp();
-
-       /* Finally, enable the distributor globally with ARE */
-       writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
-                       GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR);
-       gicv3_gicd_wait_for_rwp();
-}
-
-static void gicv3_init(unsigned int nr_cpus)
-{
-       GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
-
-       gicv3_data.nr_cpus = nr_cpus;
-       gicv3_data.nr_spis = GICD_TYPER_SPIS(
-                               readl(GICD_BASE_GVA + GICD_TYPER));
-       if (gicv3_data.nr_spis > 1020)
-               gicv3_data.nr_spis = 1020;
-
-       /*
-        * Initialize only the distributor for now.
-        * The redistributor and CPU interfaces are initialized
-        * later for every PE.
-        */
-       gicv3_dist_init();
-}
-
-const struct gic_common_ops gicv3_ops = {
-       .gic_init = gicv3_init,
-       .gic_cpu_init = gicv3_cpu_init,
-       .gic_irq_enable = gicv3_irq_enable,
-       .gic_irq_disable = gicv3_irq_disable,
-       .gic_read_iar = gicv3_read_iar,
-       .gic_write_eoir = gicv3_write_eoir,
-       .gic_write_dir = gicv3_write_dir,
-       .gic_set_priority_mask = gicv3_set_priority_mask,
-       .gic_set_eoi_split = gicv3_set_eoi_split,
-       .gic_set_priority = gicv3_set_priority,
-       .gic_irq_set_active = gicv3_irq_set_active,
-       .gic_irq_clear_active = gicv3_irq_clear_active,
-       .gic_irq_get_active = gicv3_irq_get_active,
-       .gic_irq_set_pending = gicv3_irq_set_pending,
-       .gic_irq_clear_pending = gicv3_irq_clear_pending,
-       .gic_irq_get_pending = gicv3_irq_get_pending,
-       .gic_irq_set_config = gicv3_irq_set_config,
-};
-
-void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
-                          vm_paddr_t pend_table)
-{
-       volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
-
-       u32 ctlr;
-       u64 val;
-
-       val = (cfg_table |
-              GICR_PROPBASER_InnerShareable |
-              GICR_PROPBASER_RaWaWb |
-              ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK));
-       writeq_relaxed(val, rdist_base + GICR_PROPBASER);
-
-       val = (pend_table |
-              GICR_PENDBASER_InnerShareable |
-              GICR_PENDBASER_RaWaWb);
-       writeq_relaxed(val, rdist_base + GICR_PENDBASER);
-
-       ctlr = readl_relaxed(rdist_base + GICR_CTLR);
-       ctlr |= GICR_CTLR_ENABLE_LPIS;
-       writel_relaxed(ctlr, rdist_base + GICR_CTLR);
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c
deleted file mode 100644 (file)
index 09f2705..0000000
+++ /dev/null
@@ -1,248 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c
- * over in the kernel tree.
- */
-
-#include <linux/kvm.h>
-#include <linux/sizes.h>
-#include <asm/kvm_para.h>
-#include <asm/kvm.h>
-
-#include "kvm_util.h"
-#include "vgic.h"
-#include "gic.h"
-#include "gic_v3.h"
-#include "processor.h"
-
-static u64 its_read_u64(unsigned long offset)
-{
-       return readq_relaxed(GITS_BASE_GVA + offset);
-}
-
-static void its_write_u64(unsigned long offset, u64 val)
-{
-       writeq_relaxed(val, GITS_BASE_GVA + offset);
-}
-
-static u32 its_read_u32(unsigned long offset)
-{
-       return readl_relaxed(GITS_BASE_GVA + offset);
-}
-
-static void its_write_u32(unsigned long offset, u32 val)
-{
-       writel_relaxed(val, GITS_BASE_GVA + offset);
-}
-
-static unsigned long its_find_baser(unsigned int type)
-{
-       int i;
-
-       for (i = 0; i < GITS_BASER_NR_REGS; i++) {
-               u64 baser;
-               unsigned long offset = GITS_BASER + (i * sizeof(baser));
-
-               baser = its_read_u64(offset);
-               if (GITS_BASER_TYPE(baser) == type)
-                       return offset;
-       }
-
-       GUEST_FAIL("Couldn't find an ITS BASER of type %u", type);
-       return -1;
-}
-
-static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
-{
-       unsigned long offset = its_find_baser(type);
-       u64 baser;
-
-       baser = ((size / SZ_64K) - 1) |
-               GITS_BASER_PAGE_SIZE_64K |
-               GITS_BASER_InnerShareable |
-               base |
-               GITS_BASER_RaWaWb |
-               GITS_BASER_VALID;
-
-       its_write_u64(offset, baser);
-}
-
-static void its_install_cmdq(vm_paddr_t base, size_t size)
-{
-       u64 cbaser;
-
-       cbaser = ((size / SZ_4K) - 1) |
-                GITS_CBASER_InnerShareable |
-                base |
-                GITS_CBASER_RaWaWb |
-                GITS_CBASER_VALID;
-
-       its_write_u64(GITS_CBASER, cbaser);
-}
-
-void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
-             vm_paddr_t device_tbl, size_t device_tbl_sz,
-             vm_paddr_t cmdq, size_t cmdq_size)
-{
-       u32 ctlr;
-
-       its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz);
-       its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz);
-       its_install_cmdq(cmdq, cmdq_size);
-
-       ctlr = its_read_u32(GITS_CTLR);
-       ctlr |= GITS_CTLR_ENABLE;
-       its_write_u32(GITS_CTLR, ctlr);
-}
-
-struct its_cmd_block {
-       union {
-               u64     raw_cmd[4];
-               __le64  raw_cmd_le[4];
-       };
-};
-
-static inline void its_fixup_cmd(struct its_cmd_block *cmd)
-{
-       /* Let's fixup BE commands */
-       cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
-       cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
-       cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
-       cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
-}
-
-static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
-{
-       u64 mask = GENMASK_ULL(h, l);
-       *raw_cmd &= ~mask;
-       *raw_cmd |= (val << l) & mask;
-}
-
-static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
-{
-       its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0);
-}
-
-static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
-{
-       its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32);
-}
-
-static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
-{
-       its_mask_encode(&cmd->raw_cmd[1], id, 31, 0);
-}
-
-static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
-{
-       its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32);
-}
-
-static void its_encode_size(struct its_cmd_block *cmd, u8 size)
-{
-       its_mask_encode(&cmd->raw_cmd[1], size, 4, 0);
-}
-
-static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
-{
-       its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
-}
-
-static void its_encode_valid(struct its_cmd_block *cmd, int valid)
-{
-       its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63);
-}
-
-static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
-{
-       its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
-}
-
-static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
-{
-       its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
-}
-
-#define GITS_CMDQ_POLL_ITERATIONS      0
-
-static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
-{
-       u64 cwriter = its_read_u64(GITS_CWRITER);
-       struct its_cmd_block *dst = cmdq_base + cwriter;
-       u64 cbaser = its_read_u64(GITS_CBASER);
-       size_t cmdq_size;
-       u64 next;
-       int i;
-
-       cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K;
-
-       its_fixup_cmd(cmd);
-
-       WRITE_ONCE(*dst, *cmd);
-       dsb(ishst);
-       next = (cwriter + sizeof(*cmd)) % cmdq_size;
-       its_write_u64(GITS_CWRITER, next);
-
-       /*
-        * Polling isn't necessary considering KVM's ITS emulation at the time
-        * of writing this, as the CMDQ is processed synchronously after a write
-        * to CWRITER.
-        */
-       for (i = 0; its_read_u64(GITS_CREADR) != next; i++) {
-               __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS,
-                              "ITS didn't process command at offset %lu after %d iterations\n",
-                              cwriter, i);
-
-               cpu_relax();
-       }
-}
-
-void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
-                      size_t itt_size, bool valid)
-{
-       struct its_cmd_block cmd = {};
-
-       its_encode_cmd(&cmd, GITS_CMD_MAPD);
-       its_encode_devid(&cmd, device_id);
-       its_encode_size(&cmd, ilog2(itt_size) - 1);
-       its_encode_itt(&cmd, itt_base);
-       its_encode_valid(&cmd, valid);
-
-       its_send_cmd(cmdq_base, &cmd);
-}
-
-void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid)
-{
-       struct its_cmd_block cmd = {};
-
-       its_encode_cmd(&cmd, GITS_CMD_MAPC);
-       its_encode_collection(&cmd, collection_id);
-       its_encode_target(&cmd, vcpu_id);
-       its_encode_valid(&cmd, valid);
-
-       its_send_cmd(cmdq_base, &cmd);
-}
-
-void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
-                       u32 collection_id, u32 intid)
-{
-       struct its_cmd_block cmd = {};
-
-       its_encode_cmd(&cmd, GITS_CMD_MAPTI);
-       its_encode_devid(&cmd, device_id);
-       its_encode_event_id(&cmd, event_id);
-       its_encode_phys_id(&cmd, intid);
-       its_encode_collection(&cmd, collection_id);
-
-       its_send_cmd(cmdq_base, &cmd);
-}
-
-void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
-{
-       struct its_cmd_block cmd = {};
-
-       its_encode_cmd(&cmd, GITS_CMD_INVALL);
-       its_encode_collection(&cmd, collection_id);
-
-       its_send_cmd(cmdq_base, &cmd);
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
deleted file mode 100644 (file)
index 0e443ea..0000000
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-.macro save_registers
-       add     sp, sp, #-16 * 17
-
-       stp     x0, x1, [sp, #16 * 0]
-       stp     x2, x3, [sp, #16 * 1]
-       stp     x4, x5, [sp, #16 * 2]
-       stp     x6, x7, [sp, #16 * 3]
-       stp     x8, x9, [sp, #16 * 4]
-       stp     x10, x11, [sp, #16 * 5]
-       stp     x12, x13, [sp, #16 * 6]
-       stp     x14, x15, [sp, #16 * 7]
-       stp     x16, x17, [sp, #16 * 8]
-       stp     x18, x19, [sp, #16 * 9]
-       stp     x20, x21, [sp, #16 * 10]
-       stp     x22, x23, [sp, #16 * 11]
-       stp     x24, x25, [sp, #16 * 12]
-       stp     x26, x27, [sp, #16 * 13]
-       stp     x28, x29, [sp, #16 * 14]
-
-       /*
-        * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
-        * at it. It will _not_ be used to restore the sp on return from the
-        * exception so handlers can not update it.
-        */
-       add     x1, sp, #16 * 17
-       stp     x30, x1, [sp, #16 * 15] /* x30, SP */
-
-       mrs     x1, elr_el1
-       mrs     x2, spsr_el1
-       stp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
-.endm
-
-.macro restore_registers
-       ldp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
-       msr     elr_el1, x1
-       msr     spsr_el1, x2
-
-       /* sp is not restored */
-       ldp     x30, xzr, [sp, #16 * 15] /* x30, SP */
-
-       ldp     x28, x29, [sp, #16 * 14]
-       ldp     x26, x27, [sp, #16 * 13]
-       ldp     x24, x25, [sp, #16 * 12]
-       ldp     x22, x23, [sp, #16 * 11]
-       ldp     x20, x21, [sp, #16 * 10]
-       ldp     x18, x19, [sp, #16 * 9]
-       ldp     x16, x17, [sp, #16 * 8]
-       ldp     x14, x15, [sp, #16 * 7]
-       ldp     x12, x13, [sp, #16 * 6]
-       ldp     x10, x11, [sp, #16 * 5]
-       ldp     x8, x9, [sp, #16 * 4]
-       ldp     x6, x7, [sp, #16 * 3]
-       ldp     x4, x5, [sp, #16 * 2]
-       ldp     x2, x3, [sp, #16 * 1]
-       ldp     x0, x1, [sp, #16 * 0]
-
-       add     sp, sp, #16 * 17
-
-       eret
-.endm
-
-.pushsection ".entry.text", "ax"
-.balign 0x800
-.global vectors
-vectors:
-.popsection
-
-.set   vector, 0
-
-/*
- * Build an exception handler for vector and append a jump to it into
- * vectors (while making sure that it's 0x80 aligned).
- */
-.macro HANDLER, label
-handler_\label:
-       save_registers
-       mov     x0, sp
-       mov     x1, #vector
-       bl      route_exception
-       restore_registers
-
-.pushsection ".entry.text", "ax"
-.balign 0x80
-       b       handler_\label
-.popsection
-
-.set   vector, vector + 1
-.endm
-
-.macro HANDLER_INVALID
-.pushsection ".entry.text", "ax"
-.balign 0x80
-/* This will abort so no need to save and restore registers. */
-       mov     x0, #vector
-       mov     x1, #0 /* ec */
-       mov     x2, #0 /* valid_ec */
-       b       kvm_exit_unexpected_exception
-.popsection
-
-.set   vector, vector + 1
-.endm
-
-/*
- * Caution: be sure to not add anything between the declaration of vectors
- * above and these macro calls that will build the vectors table below it.
- */
-       HANDLER_INVALID                         // Synchronous EL1t
-       HANDLER_INVALID                         // IRQ EL1t
-       HANDLER_INVALID                         // FIQ EL1t
-       HANDLER_INVALID                         // Error EL1t
-
-       HANDLER el1h_sync                       // Synchronous EL1h
-       HANDLER el1h_irq                        // IRQ EL1h
-       HANDLER el1h_fiq                        // FIQ EL1h
-       HANDLER el1h_error                      // Error EL1h
-
-       HANDLER el0_sync_64                     // Synchronous 64-bit EL0
-       HANDLER el0_irq_64                      // IRQ 64-bit EL0
-       HANDLER el0_fiq_64                      // FIQ 64-bit EL0
-       HANDLER el0_error_64                    // Error 64-bit EL0
-
-       HANDLER el0_sync_32                     // Synchronous 32-bit EL0
-       HANDLER el0_irq_32                      // IRQ 32-bit EL0
-       HANDLER el0_fiq_32                      // FIQ 32-bit EL0
-       HANDLER el0_error_32                    // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
deleted file mode 100644 (file)
index 7ba3aa3..0000000
+++ /dev/null
@@ -1,647 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * AArch64 code
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-
-#include <linux/compiler.h>
-#include <assert.h>
-
-#include "guest_modes.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "ucall_common.h"
-
-#include <linux/bitfield.h>
-#include <linux/sizes.h>
-
-#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN    0xac0000
-
-static vm_vaddr_t exception_handlers;
-
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
-{
-       return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
-static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
-       uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
-
-       return (gva >> shift) & mask;
-}
-
-static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
-       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-
-       TEST_ASSERT(vm->pgtable_levels == 4,
-               "Mode %d does not have 4 page table levels", vm->mode);
-
-       return (gva >> shift) & mask;
-}
-
-static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
-       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-
-       TEST_ASSERT(vm->pgtable_levels >= 3,
-               "Mode %d does not have >= 3 page table levels", vm->mode);
-
-       return (gva >> shift) & mask;
-}
-
-static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
-       return (gva >> vm->page_shift) & mask;
-}
-
-static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
-{
-       return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
-           (vm->pa_bits > 48 || vm->va_bits > 48);
-}
-
-static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
-{
-       uint64_t pte;
-
-       if (use_lpa2_pte_format(vm)) {
-               pte = pa & GENMASK(49, vm->page_shift);
-               pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
-               attrs &= ~GENMASK(9, 8);
-       } else {
-               pte = pa & GENMASK(47, vm->page_shift);
-               if (vm->page_shift == 16)
-                       pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
-       }
-       pte |= attrs;
-
-       return pte;
-}
-
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
-{
-       uint64_t pa;
-
-       if (use_lpa2_pte_format(vm)) {
-               pa = pte & GENMASK(49, vm->page_shift);
-               pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
-       } else {
-               pa = pte & GENMASK(47, vm->page_shift);
-               if (vm->page_shift == 16)
-                       pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
-       }
-
-       return pa;
-}
-
-static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
-{
-       unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
-       return 1 << (vm->va_bits - shift);
-}
-
-static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
-{
-       return 1 << (vm->page_shift - 3);
-}
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm)
-{
-       size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
-
-       if (vm->pgd_created)
-               return;
-
-       vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
-                                    KVM_GUEST_PAGE_TABLE_MIN_PADDR,
-                                    vm->memslots[MEM_REGION_PT]);
-       vm->pgd_created = true;
-}
-
-static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-                        uint64_t flags)
-{
-       uint8_t attr_idx = flags & 7;
-       uint64_t *ptep;
-
-       TEST_ASSERT((vaddr % vm->page_size) == 0,
-               "Virtual address not on page boundary,\n"
-               "  vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
-       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
-               (vaddr >> vm->page_shift)),
-               "Invalid virtual address, vaddr: 0x%lx", vaddr);
-       TEST_ASSERT((paddr % vm->page_size) == 0,
-               "Physical address not on page boundary,\n"
-               "  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
-       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
-               "Physical address beyond beyond maximum supported,\n"
-               "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-               paddr, vm->max_gfn, vm->page_size);
-
-       ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
-       if (!*ptep)
-               *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
-
-       switch (vm->pgtable_levels) {
-       case 4:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
-               if (!*ptep)
-                       *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
-               /* fall through */
-       case 3:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
-               if (!*ptep)
-                       *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
-               /* fall through */
-       case 2:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
-               break;
-       default:
-               TEST_FAIL("Page table levels must be 2, 3, or 4");
-       }
-
-       *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3);  /* AF */
-}
-
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
-{
-       uint64_t attr_idx = MT_NORMAL;
-
-       _virt_pg_map(vm, vaddr, paddr, attr_idx);
-}
-
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       uint64_t *ptep;
-
-       if (!vm->pgd_created)
-               goto unmapped_gva;
-
-       ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
-       if (!ptep)
-               goto unmapped_gva;
-
-       switch (vm->pgtable_levels) {
-       case 4:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
-               if (!ptep)
-                       goto unmapped_gva;
-               /* fall through */
-       case 3:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
-               if (!ptep)
-                       goto unmapped_gva;
-               /* fall through */
-       case 2:
-               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
-               if (!ptep)
-                       goto unmapped_gva;
-               break;
-       default:
-               TEST_FAIL("Page table levels must be 2, 3, or 4");
-       }
-
-       return ptep;
-
-unmapped_gva:
-       TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
-       exit(EXIT_FAILURE);
-}
-
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       uint64_t *ptep = virt_get_pte_hva(vm, gva);
-
-       return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
-}
-
-static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
-{
-#ifdef DEBUG
-       static const char * const type[] = { "", "pud", "pmd", "pte" };
-       uint64_t pte, *ptep;
-
-       if (level == 4)
-               return;
-
-       for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
-               ptep = addr_gpa2hva(vm, pte);
-               if (!*ptep)
-                       continue;
-               fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
-               pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
-       }
-#endif
-}
-
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
-       int level = 4 - (vm->pgtable_levels - 1);
-       uint64_t pgd, *ptep;
-
-       if (!vm->pgd_created)
-               return;
-
-       for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
-               ptep = addr_gpa2hva(vm, pgd);
-               if (!*ptep)
-                       continue;
-               fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
-               pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
-       }
-}
-
-void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
-{
-       struct kvm_vcpu_init default_init = { .target = -1, };
-       struct kvm_vm *vm = vcpu->vm;
-       uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
-
-       if (!init)
-               init = &default_init;
-
-       if (init->target == -1) {
-               struct kvm_vcpu_init preferred;
-               vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
-               init->target = preferred.target;
-       }
-
-       vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
-
-       /*
-        * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
-        * registers, which the variable argument list macros do.
-        */
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
-
-       sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
-       tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
-
-       /* Configure base granule size */
-       switch (vm->mode) {
-       case VM_MODE_PXXV48_4K:
-               TEST_FAIL("AArch64 does not support 4K sized pages "
-                         "with ANY-bit physical address ranges");
-       case VM_MODE_P52V48_64K:
-       case VM_MODE_P48V48_64K:
-       case VM_MODE_P40V48_64K:
-       case VM_MODE_P36V48_64K:
-               tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
-               break;
-       case VM_MODE_P52V48_16K:
-       case VM_MODE_P48V48_16K:
-       case VM_MODE_P40V48_16K:
-       case VM_MODE_P36V48_16K:
-       case VM_MODE_P36V47_16K:
-               tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
-               break;
-       case VM_MODE_P52V48_4K:
-       case VM_MODE_P48V48_4K:
-       case VM_MODE_P40V48_4K:
-       case VM_MODE_P36V48_4K:
-               tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
-               break;
-       default:
-               TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
-       }
-
-       ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
-
-       /* Configure output size */
-       switch (vm->mode) {
-       case VM_MODE_P52V48_4K:
-       case VM_MODE_P52V48_16K:
-       case VM_MODE_P52V48_64K:
-               tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
-               ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
-               break;
-       case VM_MODE_P48V48_4K:
-       case VM_MODE_P48V48_16K:
-       case VM_MODE_P48V48_64K:
-               tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
-               break;
-       case VM_MODE_P40V48_4K:
-       case VM_MODE_P40V48_16K:
-       case VM_MODE_P40V48_64K:
-               tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
-               break;
-       case VM_MODE_P36V48_4K:
-       case VM_MODE_P36V48_16K:
-       case VM_MODE_P36V48_64K:
-       case VM_MODE_P36V47_16K:
-               tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
-               break;
-       default:
-               TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
-       }
-
-       sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
-       /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
-       tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
-       tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
-       if (use_lpa2_pte_format(vm))
-               tcr_el1 |= (1ul << 59) /* DS */;
-
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
-}
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
-{
-       uint64_t pstate, pc;
-
-       pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
-       pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
-
-       fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
-               indent, "", pstate, pc);
-}
-
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
-{
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
-}
-
-static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                          struct kvm_vcpu_init *init)
-{
-       size_t stack_size;
-       uint64_t stack_vaddr;
-       struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
-
-       stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
-                                            vm->page_size;
-       stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
-                                      DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
-                                      MEM_REGION_DATA);
-
-       aarch64_vcpu_setup(vcpu, init);
-
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
-       return vcpu;
-}
-
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 struct kvm_vcpu_init *init, void *guest_code)
-{
-       struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
-
-       vcpu_arch_set_entry_point(vcpu, guest_code);
-
-       return vcpu;
-}
-
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
-{
-       return __aarch64_vcpu_add(vm, vcpu_id, NULL);
-}
-
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
-{
-       va_list ap;
-       int i;
-
-       TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
-                   "  num: %u", num);
-
-       va_start(ap, num);
-
-       for (i = 0; i < num; i++) {
-               vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
-                            va_arg(ap, uint64_t));
-       }
-
-       va_end(ap);
-}
-
-void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
-{
-       ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
-       while (1)
-               ;
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
-               return;
-
-       if (uc.args[2]) /* valid_ec */ {
-               assert(VECTOR_IS_SYNC(uc.args[0]));
-               TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
-                         uc.args[0], uc.args[1]);
-       } else {
-               assert(!VECTOR_IS_SYNC(uc.args[0]));
-               TEST_FAIL("Unexpected exception (vector:0x%lx)",
-                         uc.args[0]);
-       }
-}
-
-struct handlers {
-       handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1];
-};
-
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
-{
-       extern char vectors;
-
-       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
-}
-
-void route_exception(struct ex_regs *regs, int vector)
-{
-       struct handlers *handlers = (struct handlers *)exception_handlers;
-       bool valid_ec;
-       int ec = 0;
-
-       switch (vector) {
-       case VECTOR_SYNC_CURRENT:
-       case VECTOR_SYNC_LOWER_64:
-               ec = ESR_ELx_EC(read_sysreg(esr_el1));
-               valid_ec = true;
-               break;
-       case VECTOR_IRQ_CURRENT:
-       case VECTOR_IRQ_LOWER_64:
-       case VECTOR_FIQ_CURRENT:
-       case VECTOR_FIQ_LOWER_64:
-       case VECTOR_ERROR_CURRENT:
-       case VECTOR_ERROR_LOWER_64:
-               ec = 0;
-               valid_ec = false;
-               break;
-       default:
-               valid_ec = false;
-               goto unexpected_exception;
-       }
-
-       if (handlers && handlers->exception_handlers[vector][ec])
-               return handlers->exception_handlers[vector][ec](regs);
-
-unexpected_exception:
-       kvm_exit_unexpected_exception(vector, ec, valid_ec);
-}
-
-void vm_init_descriptor_tables(struct kvm_vm *vm)
-{
-       vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
-                                       vm->page_size, MEM_REGION_DATA);
-
-       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
-}
-
-void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
-                        void (*handler)(struct ex_regs *))
-{
-       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
-
-       assert(VECTOR_IS_SYNC(vector));
-       assert(vector < VECTOR_NUM);
-       assert(ec <= ESR_ELx_EC_MAX);
-       handlers->exception_handlers[vector][ec] = handler;
-}
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
-                        void (*handler)(struct ex_regs *))
-{
-       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
-
-       assert(!VECTOR_IS_SYNC(vector));
-       assert(vector < VECTOR_NUM);
-       handlers->exception_handlers[vector][0] = handler;
-}
-
-uint32_t guest_get_vcpuid(void)
-{
-       return read_sysreg(tpidr_el1);
-}
-
-static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
-                               uint32_t not_sup_val, uint32_t ipa52_min_val)
-{
-       if (gran == not_sup_val)
-               return 0;
-       else if (gran >= ipa52_min_val && vm_ipa >= 52)
-               return 52;
-       else
-               return min(vm_ipa, 48U);
-}
-
-void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
-                                       uint32_t *ipa16k, uint32_t *ipa64k)
-{
-       struct kvm_vcpu_init preferred_init;
-       int kvm_fd, vm_fd, vcpu_fd, err;
-       uint64_t val;
-       uint32_t gran;
-       struct kvm_one_reg reg = {
-               .id     = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
-               .addr   = (uint64_t)&val,
-       };
-
-       kvm_fd = open_kvm_dev_path_or_exit();
-       vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
-       TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
-
-       vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
-       TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
-
-       err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
-       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
-       err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
-       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
-
-       err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
-       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
-
-       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
-       *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
-                                       ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
-
-       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
-       *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
-                                       ID_AA64MMFR0_EL1_TGRAN64_IMP);
-
-       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
-       *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
-                                       ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
-
-       close(vcpu_fd);
-       close(vm_fd);
-       close(kvm_fd);
-}
-
-#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5,    \
-                    arg6, res)                                                 \
-       asm volatile("mov   w0, %w[function_id]\n"                              \
-                    "mov   x1, %[arg0]\n"                                      \
-                    "mov   x2, %[arg1]\n"                                      \
-                    "mov   x3, %[arg2]\n"                                      \
-                    "mov   x4, %[arg3]\n"                                      \
-                    "mov   x5, %[arg4]\n"                                      \
-                    "mov   x6, %[arg5]\n"                                      \
-                    "mov   x7, %[arg6]\n"                                      \
-                    #insn  "#0\n"                                              \
-                    "mov   %[res0], x0\n"                                      \
-                    "mov   %[res1], x1\n"                                      \
-                    "mov   %[res2], x2\n"                                      \
-                    "mov   %[res3], x3\n"                                      \
-                    : [res0] "=r"(res->a0), [res1] "=r"(res->a1),              \
-                      [res2] "=r"(res->a2), [res3] "=r"(res->a3)               \
-                    : [function_id] "r"(function_id), [arg0] "r"(arg0),        \
-                      [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3),    \
-                      [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)     \
-                    : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
-
-
-void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
-              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
-              uint64_t arg6, struct arm_smccc_res *res)
-{
-       __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
-                    arg6, res);
-}
-
-void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
-              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
-              uint64_t arg6, struct arm_smccc_res *res)
-{
-       __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
-                    arg6, res);
-}
-
-void kvm_selftest_arch_init(void)
-{
-       /*
-        * arm64 doesn't have a true default mode, so start by computing the
-        * available IPA space and page sizes early.
-        */
-       guest_modes_append_default();
-}
-
-void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
-{
-       /*
-        * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
-        * is [0, 2^(64 - TCR_EL1.T0SZ)).
-        */
-       sparsebit_set_num(vm->vpages_valid, 0,
-                         (1ULL << vm->va_bits) >> vm->page_shift);
-}
-
-/* Helper to call wfi instruction. */
-void wfi(void)
-{
-       asm volatile("wfi");
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
deleted file mode 100644 (file)
index a076e78..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM64 Spinlock support
- */
-#include <stdint.h>
-
-#include "spinlock.h"
-
-void spin_lock(struct spinlock *lock)
-{
-       int val, res;
-
-       asm volatile(
-       "1:     ldaxr   %w0, [%2]\n"
-       "       cbnz    %w0, 1b\n"
-       "       mov     %w0, #1\n"
-       "       stxr    %w1, %w0, [%2]\n"
-       "       cbnz    %w1, 1b\n"
-       : "=&r" (val), "=&r" (res)
-       : "r" (&lock->v)
-       : "memory");
-}
-
-void spin_unlock(struct spinlock *lock)
-{
-       asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
deleted file mode 100644 (file)
index ddab0ce..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-vm_vaddr_t *ucall_exit_mmio_addr;
-
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-       vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
-
-       virt_map(vm, mmio_gva, mmio_gpa, 1);
-
-       vm->ucall_mmio_addr = mmio_gpa;
-
-       write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
-}
-
-void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-
-       if (run->exit_reason == KVM_EXIT_MMIO &&
-           run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
-               TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
-                           "Unexpected ucall exit mmio address access");
-               return (void *)(*((uint64_t *)run->mmio.data));
-       }
-
-       return NULL;
-}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
deleted file mode 100644 (file)
index 4427f43..0000000
+++ /dev/null
@@ -1,188 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM Generic Interrupt Controller (GIC) v3 host support
- */
-
-#include <linux/kernel.h>
-#include <linux/kvm.h>
-#include <linux/sizes.h>
-#include <asm/cputype.h>
-#include <asm/kvm_para.h>
-#include <asm/kvm.h>
-
-#include "kvm_util.h"
-#include "vgic.h"
-#include "gic.h"
-#include "gic_v3.h"
-
-/*
- * vGIC-v3 default host setup
- *
- * Input args:
- *     vm - KVM VM
- *     nr_vcpus - Number of vCPUs supported by this VM
- *
- * Output args: None
- *
- * Return: GIC file-descriptor or negative error code upon failure
- *
- * The function creates a vGIC-v3 device and maps the distributor and
- * redistributor regions of the guest. Since it depends on the number of
- * vCPUs for the VM, it must be called after all the vCPUs have been created.
- */
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
-{
-       int gic_fd;
-       uint64_t attr;
-       struct list_head *iter;
-       unsigned int nr_gic_pages, nr_vcpus_created = 0;
-
-       TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
-
-       /*
-        * Make sure that the caller is infact calling this
-        * function after all the vCPUs are added.
-        */
-       list_for_each(iter, &vm->vcpus)
-               nr_vcpus_created++;
-       TEST_ASSERT(nr_vcpus == nr_vcpus_created,
-                       "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
-                       nr_vcpus, nr_vcpus_created);
-
-       /* Distributor setup */
-       gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
-       if (gic_fd < 0)
-               return gic_fd;
-
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
-
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       attr = GICD_BASE_GPA;
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
-       nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
-       virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages);
-
-       /* Redistributor setup */
-       attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0);
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr);
-       nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
-                                               KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
-       virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
-
-       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       return gic_fd;
-}
-
-/* should only work for level sensitive interrupts */
-int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
-{
-       uint64_t attr = 32 * (intid / 32);
-       uint64_t index = intid % 32;
-       uint64_t val;
-       int ret;
-
-       ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
-                                   attr, &val);
-       if (ret != 0)
-               return ret;
-
-       val |= 1U << index;
-       ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
-                                   attr, &val);
-       return ret;
-}
-
-void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
-{
-       int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
-
-       TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
-}
-
-int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
-{
-       uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
-
-       TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
-               "doesn't allow injecting SGIs. There's no mask for it.");
-
-       if (INTID_IS_PPI(intid))
-               irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
-       else
-               irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
-
-       return _kvm_irq_line(vm, irq, level);
-}
-
-void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
-{
-       int ret = _kvm_arm_irq_line(vm, intid, level);
-
-       TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
-}
-
-static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
-                         uint64_t reg_off)
-{
-       uint64_t reg = intid / 32;
-       uint64_t index = intid % 32;
-       uint64_t attr = reg_off + reg * 4;
-       uint64_t val;
-       bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
-
-       uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
-                                         : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
-
-       if (intid_is_private) {
-               /* TODO: only vcpu 0 implemented for now. */
-               assert(vcpu->id == 0);
-               attr += SZ_64K;
-       }
-
-       /* Check that the addr part of the attr is within 32 bits. */
-       assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
-
-       /*
-        * All calls will succeed, even with invalid intid's, as long as the
-        * addr part of the attr is within 32 bits (checked above). An invalid
-        * intid will just make the read/writes point to above the intended
-        * register space (i.e., ICPENDR after ISPENDR).
-        */
-       kvm_device_attr_get(gic_fd, group, attr, &val);
-       val |= 1ULL << index;
-       kvm_device_attr_set(gic_fd, group, attr, &val);
-}
-
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
-{
-       vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
-}
-
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
-{
-       vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
-}
-
-int vgic_its_setup(struct kvm_vm *vm)
-{
-       int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
-       u64 attr;
-
-       attr = GITS_BASE_GPA;
-       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-                           KVM_VGIC_ITS_ADDR_TYPE, &attr);
-
-       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
-                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
-       virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA,
-                vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE));
-
-       return its_fd;
-}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c
new file mode 100644 (file)
index 0000000..7abbf88
--- /dev/null
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) support
+ */
+
+#include <errno.h>
+#include <linux/bits.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+
+#include <gic.h>
+#include "gic_private.h"
+#include "processor.h"
+#include "spinlock.h"
+
+static const struct gic_common_ops *gic_common_ops;
+static struct spinlock gic_lock;
+
+static void gic_cpu_init(unsigned int cpu)
+{
+       gic_common_ops->gic_cpu_init(cpu);
+}
+
+static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
+{
+       const struct gic_common_ops *gic_ops = NULL;
+
+       spin_lock(&gic_lock);
+
+       /* Distributor initialization is needed only once per VM */
+       if (gic_common_ops) {
+               spin_unlock(&gic_lock);
+               return;
+       }
+
+       if (type == GIC_V3)
+               gic_ops = &gicv3_ops;
+
+       GUEST_ASSERT(gic_ops);
+
+       gic_ops->gic_init(nr_cpus);
+       gic_common_ops = gic_ops;
+
+       /* Make sure that the initialized data is visible to all the vCPUs */
+       dsb(sy);
+
+       spin_unlock(&gic_lock);
+}
+
+void gic_init(enum gic_type type, unsigned int nr_cpus)
+{
+       uint32_t cpu = guest_get_vcpuid();
+
+       GUEST_ASSERT(type < GIC_TYPE_MAX);
+       GUEST_ASSERT(nr_cpus);
+
+       gic_dist_init(type, nr_cpus);
+       gic_cpu_init(cpu);
+}
+
+void gic_irq_enable(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_enable(intid);
+}
+
+void gic_irq_disable(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_disable(intid);
+}
+
+unsigned int gic_get_and_ack_irq(void)
+{
+       uint64_t irqstat;
+       unsigned int intid;
+
+       GUEST_ASSERT(gic_common_ops);
+
+       irqstat = gic_common_ops->gic_read_iar();
+       intid = irqstat & GENMASK(23, 0);
+
+       return intid;
+}
+
+void gic_set_eoi(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_write_eoir(intid);
+}
+
+void gic_set_dir(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_write_dir(intid);
+}
+
+void gic_set_eoi_split(bool split)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_set_eoi_split(split);
+}
+
+void gic_set_priority_mask(uint64_t pmr)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_set_priority_mask(pmr);
+}
+
+void gic_set_priority(unsigned int intid, unsigned int prio)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_set_priority(intid, prio);
+}
+
+void gic_irq_set_active(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_set_active(intid);
+}
+
+void gic_irq_clear_active(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_clear_active(intid);
+}
+
+bool gic_irq_get_active(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       return gic_common_ops->gic_irq_get_active(intid);
+}
+
+void gic_irq_set_pending(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_set_pending(intid);
+}
+
+void gic_irq_clear_pending(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_clear_pending(intid);
+}
+
+bool gic_irq_get_pending(unsigned int intid)
+{
+       GUEST_ASSERT(gic_common_ops);
+       return gic_common_ops->gic_irq_get_pending(intid);
+}
+
+void gic_irq_set_config(unsigned int intid, bool is_edge)
+{
+       GUEST_ASSERT(gic_common_ops);
+       gic_common_ops->gic_irq_set_config(intid, is_edge);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
new file mode 100644 (file)
index 0000000..d24e9ec
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) private defines that's only
+ * shared among the GIC library code.
+ */
+
+#ifndef SELFTEST_KVM_GIC_PRIVATE_H
+#define SELFTEST_KVM_GIC_PRIVATE_H
+
+struct gic_common_ops {
+       void (*gic_init)(unsigned int nr_cpus);
+       void (*gic_cpu_init)(unsigned int cpu);
+       void (*gic_irq_enable)(unsigned int intid);
+       void (*gic_irq_disable)(unsigned int intid);
+       uint64_t (*gic_read_iar)(void);
+       void (*gic_write_eoir)(uint32_t irq);
+       void (*gic_write_dir)(uint32_t irq);
+       void (*gic_set_eoi_split)(bool split);
+       void (*gic_set_priority_mask)(uint64_t mask);
+       void (*gic_set_priority)(uint32_t intid, uint32_t prio);
+       void (*gic_irq_set_active)(uint32_t intid);
+       void (*gic_irq_clear_active)(uint32_t intid);
+       bool (*gic_irq_get_active)(uint32_t intid);
+       void (*gic_irq_set_pending)(uint32_t intid);
+       void (*gic_irq_clear_pending)(uint32_t intid);
+       bool (*gic_irq_get_pending)(uint32_t intid);
+       void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+};
+
+extern const struct gic_common_ops gicv3_ops;
+
+#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
new file mode 100644 (file)
index 0000000..66d0550
--- /dev/null
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 support
+ */
+
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_private.h"
+
+#define GICV3_MAX_CPUS                 512
+
+#define GICD_INT_DEF_PRI               0xa0
+#define GICD_INT_DEF_PRI_X4            ((GICD_INT_DEF_PRI << 24) |\
+                                       (GICD_INT_DEF_PRI << 16) |\
+                                       (GICD_INT_DEF_PRI << 8) |\
+                                       GICD_INT_DEF_PRI)
+
+#define ICC_PMR_DEF_PRIO               0xf0
+
+struct gicv3_data {
+       unsigned int nr_cpus;
+       unsigned int nr_spis;
+};
+
+#define sgi_base_from_redist(redist_base)      (redist_base + SZ_64K)
+#define DIST_BIT                               (1U << 31)
+
+enum gicv3_intid_range {
+       SGI_RANGE,
+       PPI_RANGE,
+       SPI_RANGE,
+       INVALID_RANGE,
+};
+
+static struct gicv3_data gicv3_data;
+
+static void gicv3_gicd_wait_for_rwp(void)
+{
+       unsigned int count = 100000; /* 1s */
+
+       while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) {
+               GUEST_ASSERT(count--);
+               udelay(10);
+       }
+}
+
+static inline volatile void *gicr_base_cpu(uint32_t cpu)
+{
+       /* Align all the redistributors sequentially */
+       return GICR_BASE_GVA + cpu * SZ_64K * 2;
+}
+
+static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
+{
+       unsigned int count = 100000; /* 1s */
+
+       while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) {
+               GUEST_ASSERT(count--);
+               udelay(10);
+       }
+}
+
+static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+{
+       if (cpu_or_dist & DIST_BIT)
+               gicv3_gicd_wait_for_rwp();
+       else
+               gicv3_gicr_wait_for_rwp(cpu_or_dist);
+}
+
+static enum gicv3_intid_range get_intid_range(unsigned int intid)
+{
+       switch (intid) {
+       case 0 ... 15:
+               return SGI_RANGE;
+       case 16 ... 31:
+               return PPI_RANGE;
+       case 32 ... 1019:
+               return SPI_RANGE;
+       }
+
+       /* We should not be reaching here */
+       GUEST_ASSERT(0);
+
+       return INVALID_RANGE;
+}
+
+static uint64_t gicv3_read_iar(void)
+{
+       uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+
+       dsb(sy);
+       return irqstat;
+}
+
+static void gicv3_write_eoir(uint32_t irq)
+{
+       write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
+       isb();
+}
+
+static void gicv3_write_dir(uint32_t irq)
+{
+       write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+       isb();
+}
+
+static void gicv3_set_priority_mask(uint64_t mask)
+{
+       write_sysreg_s(mask, SYS_ICC_PMR_EL1);
+}
+
+static void gicv3_set_eoi_split(bool split)
+{
+       uint32_t val;
+
+       /*
+        * All other fields are read-only, so no need to read CTLR first. In
+        * fact, the kernel does the same.
+        */
+       val = split ? (1U << 1) : 0;
+       write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+       isb();
+}
+
+uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+{
+       volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+                       : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
+       return readl(base + offset);
+}
+
+void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+{
+       volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+                       : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
+       writel(reg_val, base + offset);
+}
+
+uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+{
+       return gicv3_reg_readl(cpu_or_dist, offset) & mask;
+}
+
+void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
+               uint32_t mask, uint32_t reg_val)
+{
+       uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+
+       tmp |= (reg_val & mask);
+       gicv3_reg_writel(cpu_or_dist, offset, tmp);
+}
+
+/*
+ * We use a single offset for the distributor and redistributor maps as they
+ * have the same value in both. The only exceptions are registers that only
+ * exist in one and not the other, like GICR_WAKER that doesn't exist in the
+ * distributor map. Such registers are conveniently marked as reserved in the
+ * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
+ * marked as "Reserved" in the Distributor map.
+ */
+static void gicv3_access_reg(uint32_t intid, uint64_t offset,
+               uint32_t reg_bits, uint32_t bits_per_field,
+               bool write, uint32_t *val)
+{
+       uint32_t cpu = guest_get_vcpuid();
+       enum gicv3_intid_range intid_range = get_intid_range(intid);
+       uint32_t fields_per_reg, index, mask, shift;
+       uint32_t cpu_or_dist;
+
+       GUEST_ASSERT(bits_per_field <= reg_bits);
+       GUEST_ASSERT(!write || *val < (1U << bits_per_field));
+       /*
+        * This function does not support 64 bit accesses. Just asserting here
+        * until we implement readq/writeq.
+        */
+       GUEST_ASSERT(reg_bits == 32);
+
+       fields_per_reg = reg_bits / bits_per_field;
+       index = intid % fields_per_reg;
+       shift = index * bits_per_field;
+       mask = ((1U << bits_per_field) - 1) << shift;
+
+       /* Set offset to the actual register holding intid's config. */
+       offset += (intid / fields_per_reg) * (reg_bits / 8);
+
+       cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
+
+       if (write)
+               gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
+       *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
+}
+
+static void gicv3_write_reg(uint32_t intid, uint64_t offset,
+               uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+{
+       gicv3_access_reg(intid, offset, reg_bits,
+                       bits_per_field, true, &val);
+}
+
+static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
+               uint32_t reg_bits, uint32_t bits_per_field)
+{
+       uint32_t val;
+
+       gicv3_access_reg(intid, offset, reg_bits,
+                       bits_per_field, false, &val);
+       return val;
+}
+
+static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+{
+       gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
+}
+
+/* Sets the intid to be level-sensitive or edge-triggered. */
+static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+{
+       uint32_t val;
+
+       /* N/A for private interrupts. */
+       GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
+       val = is_edge ? 2 : 0;
+       gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
+}
+
+static void gicv3_irq_enable(uint32_t intid)
+{
+       bool is_spi = get_intid_range(intid) == SPI_RANGE;
+       uint32_t cpu = guest_get_vcpuid();
+
+       gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
+       gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_disable(uint32_t intid)
+{
+       bool is_spi = get_intid_range(intid) == SPI_RANGE;
+       uint32_t cpu = guest_get_vcpuid();
+
+       gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
+       gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_set_active(uint32_t intid)
+{
+       gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_active(uint32_t intid)
+{
+       gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_active(uint32_t intid)
+{
+       return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
+}
+
+static void gicv3_irq_set_pending(uint32_t intid)
+{
+       gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_pending(uint32_t intid)
+{
+       gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_pending(uint32_t intid)
+{
+       return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
+}
+
+static void gicv3_enable_redist(volatile void *redist_base)
+{
+       uint32_t val = readl(redist_base + GICR_WAKER);
+       unsigned int count = 100000; /* 1s */
+
+       val &= ~GICR_WAKER_ProcessorSleep;
+       writel(val, redist_base + GICR_WAKER);
+
+       /* Wait until the processor is 'active' */
+       while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+               GUEST_ASSERT(count--);
+               udelay(10);
+       }
+}
+
+static void gicv3_cpu_init(unsigned int cpu)
+{
+       volatile void *sgi_base;
+       unsigned int i;
+       volatile void *redist_base_cpu;
+
+       GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+       redist_base_cpu = gicr_base_cpu(cpu);
+       sgi_base = sgi_base_from_redist(redist_base_cpu);
+
+       gicv3_enable_redist(redist_base_cpu);
+
+       /*
+        * Mark all the SGI and PPI interrupts as non-secure Group-1.
+        * Also, deactivate and disable them.
+        */
+       writel(~0, sgi_base + GICR_IGROUPR0);
+       writel(~0, sgi_base + GICR_ICACTIVER0);
+       writel(~0, sgi_base + GICR_ICENABLER0);
+
+       /* Set a default priority for all the SGIs and PPIs */
+       for (i = 0; i < 32; i += 4)
+               writel(GICD_INT_DEF_PRI_X4,
+                               sgi_base + GICR_IPRIORITYR0 + i);
+
+       gicv3_gicr_wait_for_rwp(cpu);
+
+       /* Enable the GIC system register (ICC_*) access */
+       write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
+                       SYS_ICC_SRE_EL1);
+
+       /* Set a default priority threshold */
+       write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+
+       /* Enable non-secure Group-1 interrupts */
+       write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
+}
+
+static void gicv3_dist_init(void)
+{
+       unsigned int i;
+
+       /* Disable the distributor until we set things up */
+       writel(0, GICD_BASE_GVA + GICD_CTLR);
+       gicv3_gicd_wait_for_rwp();
+
+       /*
+        * Mark all the SPI interrupts as non-secure Group-1.
+        * Also, deactivate and disable them.
+        */
+       for (i = 32; i < gicv3_data.nr_spis; i += 32) {
+               writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8);
+               writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8);
+               writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8);
+       }
+
+       /* Set a default priority for all the SPIs */
+       for (i = 32; i < gicv3_data.nr_spis; i += 4)
+               writel(GICD_INT_DEF_PRI_X4,
+                               GICD_BASE_GVA + GICD_IPRIORITYR + i);
+
+       /* Wait for the settings to sync-in */
+       gicv3_gicd_wait_for_rwp();
+
+       /* Finally, enable the distributor globally with ARE */
+       writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
+                       GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR);
+       gicv3_gicd_wait_for_rwp();
+}
+
+static void gicv3_init(unsigned int nr_cpus)
+{
+       GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
+
+       gicv3_data.nr_cpus = nr_cpus;
+       gicv3_data.nr_spis = GICD_TYPER_SPIS(
+                               readl(GICD_BASE_GVA + GICD_TYPER));
+       if (gicv3_data.nr_spis > 1020)
+               gicv3_data.nr_spis = 1020;
+
+       /*
+        * Initialize only the distributor for now.
+        * The redistributor and CPU interfaces are initialized
+        * later for every PE.
+        */
+       gicv3_dist_init();
+}
+
+const struct gic_common_ops gicv3_ops = {
+       .gic_init = gicv3_init,
+       .gic_cpu_init = gicv3_cpu_init,
+       .gic_irq_enable = gicv3_irq_enable,
+       .gic_irq_disable = gicv3_irq_disable,
+       .gic_read_iar = gicv3_read_iar,
+       .gic_write_eoir = gicv3_write_eoir,
+       .gic_write_dir = gicv3_write_dir,
+       .gic_set_priority_mask = gicv3_set_priority_mask,
+       .gic_set_eoi_split = gicv3_set_eoi_split,
+       .gic_set_priority = gicv3_set_priority,
+       .gic_irq_set_active = gicv3_irq_set_active,
+       .gic_irq_clear_active = gicv3_irq_clear_active,
+       .gic_irq_get_active = gicv3_irq_get_active,
+       .gic_irq_set_pending = gicv3_irq_set_pending,
+       .gic_irq_clear_pending = gicv3_irq_clear_pending,
+       .gic_irq_get_pending = gicv3_irq_get_pending,
+       .gic_irq_set_config = gicv3_irq_set_config,
+};
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+                          vm_paddr_t pend_table)
+{
+       volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
+
+       u32 ctlr;
+       u64 val;
+
+       val = (cfg_table |
+              GICR_PROPBASER_InnerShareable |
+              GICR_PROPBASER_RaWaWb |
+              ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK));
+       writeq_relaxed(val, rdist_base + GICR_PROPBASER);
+
+       val = (pend_table |
+              GICR_PENDBASER_InnerShareable |
+              GICR_PENDBASER_RaWaWb);
+       writeq_relaxed(val, rdist_base + GICR_PENDBASER);
+
+       ctlr = readl_relaxed(rdist_base + GICR_CTLR);
+       ctlr |= GICR_CTLR_ENABLE_LPIS;
+       writel_relaxed(ctlr, rdist_base + GICR_CTLR);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
new file mode 100644 (file)
index 0000000..09f2705
--- /dev/null
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c
+ * over in the kernel tree.
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "processor.h"
+
+static u64 its_read_u64(unsigned long offset)
+{
+       return readq_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u64(unsigned long offset, u64 val)
+{
+       writeq_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static u32 its_read_u32(unsigned long offset)
+{
+       return readl_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u32(unsigned long offset, u32 val)
+{
+       writel_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static unsigned long its_find_baser(unsigned int type)
+{
+       int i;
+
+       for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+               u64 baser;
+               unsigned long offset = GITS_BASER + (i * sizeof(baser));
+
+               baser = its_read_u64(offset);
+               if (GITS_BASER_TYPE(baser) == type)
+                       return offset;
+       }
+
+       GUEST_FAIL("Couldn't find an ITS BASER of type %u", type);
+       return -1;
+}
+
+static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
+{
+       unsigned long offset = its_find_baser(type);
+       u64 baser;
+
+       baser = ((size / SZ_64K) - 1) |
+               GITS_BASER_PAGE_SIZE_64K |
+               GITS_BASER_InnerShareable |
+               base |
+               GITS_BASER_RaWaWb |
+               GITS_BASER_VALID;
+
+       its_write_u64(offset, baser);
+}
+
+static void its_install_cmdq(vm_paddr_t base, size_t size)
+{
+       u64 cbaser;
+
+       cbaser = ((size / SZ_4K) - 1) |
+                GITS_CBASER_InnerShareable |
+                base |
+                GITS_CBASER_RaWaWb |
+                GITS_CBASER_VALID;
+
+       its_write_u64(GITS_CBASER, cbaser);
+}
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+             vm_paddr_t device_tbl, size_t device_tbl_sz,
+             vm_paddr_t cmdq, size_t cmdq_size)
+{
+       u32 ctlr;
+
+       its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz);
+       its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz);
+       its_install_cmdq(cmdq, cmdq_size);
+
+       ctlr = its_read_u32(GITS_CTLR);
+       ctlr |= GITS_CTLR_ENABLE;
+       its_write_u32(GITS_CTLR, ctlr);
+}
+
+struct its_cmd_block {
+       union {
+               u64     raw_cmd[4];
+               __le64  raw_cmd_le[4];
+       };
+};
+
+static inline void its_fixup_cmd(struct its_cmd_block *cmd)
+{
+       /* Let's fixup BE commands */
+       cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
+       cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
+       cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
+       cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
+}
+
+static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
+{
+       u64 mask = GENMASK_ULL(h, l);
+       *raw_cmd &= ~mask;
+       *raw_cmd |= (val << l) & mask;
+}
+
+static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
+{
+       its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0);
+}
+
+static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
+{
+       its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32);
+}
+
+static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
+{
+       its_mask_encode(&cmd->raw_cmd[1], id, 31, 0);
+}
+
+static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
+{
+       its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32);
+}
+
+static void its_encode_size(struct its_cmd_block *cmd, u8 size)
+{
+       its_mask_encode(&cmd->raw_cmd[1], size, 4, 0);
+}
+
+static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
+{
+       its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
+}
+
+static void its_encode_valid(struct its_cmd_block *cmd, int valid)
+{
+       its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63);
+}
+
+static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
+{
+       its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
+}
+
+static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
+{
+       its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
+}
+
+#define GITS_CMDQ_POLL_ITERATIONS      0
+
+static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
+{
+       u64 cwriter = its_read_u64(GITS_CWRITER);
+       struct its_cmd_block *dst = cmdq_base + cwriter;
+       u64 cbaser = its_read_u64(GITS_CBASER);
+       size_t cmdq_size;
+       u64 next;
+       int i;
+
+       cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K;
+
+       its_fixup_cmd(cmd);
+
+       WRITE_ONCE(*dst, *cmd);
+       dsb(ishst);
+       next = (cwriter + sizeof(*cmd)) % cmdq_size;
+       its_write_u64(GITS_CWRITER, next);
+
+       /*
+        * Polling isn't necessary considering KVM's ITS emulation at the time
+        * of writing this, as the CMDQ is processed synchronously after a write
+        * to CWRITER.
+        */
+       for (i = 0; its_read_u64(GITS_CREADR) != next; i++) {
+               __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS,
+                              "ITS didn't process command at offset %lu after %d iterations\n",
+                              cwriter, i);
+
+               cpu_relax();
+       }
+}
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+                      size_t itt_size, bool valid)
+{
+       struct its_cmd_block cmd = {};
+
+       its_encode_cmd(&cmd, GITS_CMD_MAPD);
+       its_encode_devid(&cmd, device_id);
+       its_encode_size(&cmd, ilog2(itt_size) - 1);
+       its_encode_itt(&cmd, itt_base);
+       its_encode_valid(&cmd, valid);
+
+       its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid)
+{
+       struct its_cmd_block cmd = {};
+
+       its_encode_cmd(&cmd, GITS_CMD_MAPC);
+       its_encode_collection(&cmd, collection_id);
+       its_encode_target(&cmd, vcpu_id);
+       its_encode_valid(&cmd, valid);
+
+       its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+                       u32 collection_id, u32 intid)
+{
+       struct its_cmd_block cmd = {};
+
+       its_encode_cmd(&cmd, GITS_CMD_MAPTI);
+       its_encode_devid(&cmd, device_id);
+       its_encode_event_id(&cmd, event_id);
+       its_encode_phys_id(&cmd, intid);
+       its_encode_collection(&cmd, collection_id);
+
+       its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
+{
+       struct its_cmd_block cmd = {};
+
+       its_encode_cmd(&cmd, GITS_CMD_INVALL);
+       its_encode_collection(&cmd, collection_id);
+
+       its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S
new file mode 100644 (file)
index 0000000..0e443ea
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+       add     sp, sp, #-16 * 17
+
+       stp     x0, x1, [sp, #16 * 0]
+       stp     x2, x3, [sp, #16 * 1]
+       stp     x4, x5, [sp, #16 * 2]
+       stp     x6, x7, [sp, #16 * 3]
+       stp     x8, x9, [sp, #16 * 4]
+       stp     x10, x11, [sp, #16 * 5]
+       stp     x12, x13, [sp, #16 * 6]
+       stp     x14, x15, [sp, #16 * 7]
+       stp     x16, x17, [sp, #16 * 8]
+       stp     x18, x19, [sp, #16 * 9]
+       stp     x20, x21, [sp, #16 * 10]
+       stp     x22, x23, [sp, #16 * 11]
+       stp     x24, x25, [sp, #16 * 12]
+       stp     x26, x27, [sp, #16 * 13]
+       stp     x28, x29, [sp, #16 * 14]
+
+       /*
+        * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+        * at it. It will _not_ be used to restore the sp on return from the
+        * exception so handlers can not update it.
+        */
+       add     x1, sp, #16 * 17
+       stp     x30, x1, [sp, #16 * 15] /* x30, SP */
+
+       mrs     x1, elr_el1
+       mrs     x2, spsr_el1
+       stp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+       ldp     x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+       msr     elr_el1, x1
+       msr     spsr_el1, x2
+
+       /* sp is not restored */
+       ldp     x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+       ldp     x28, x29, [sp, #16 * 14]
+       ldp     x26, x27, [sp, #16 * 13]
+       ldp     x24, x25, [sp, #16 * 12]
+       ldp     x22, x23, [sp, #16 * 11]
+       ldp     x20, x21, [sp, #16 * 10]
+       ldp     x18, x19, [sp, #16 * 9]
+       ldp     x16, x17, [sp, #16 * 8]
+       ldp     x14, x15, [sp, #16 * 7]
+       ldp     x12, x13, [sp, #16 * 6]
+       ldp     x10, x11, [sp, #16 * 5]
+       ldp     x8, x9, [sp, #16 * 4]
+       ldp     x6, x7, [sp, #16 * 3]
+       ldp     x4, x5, [sp, #16 * 2]
+       ldp     x2, x3, [sp, #16 * 1]
+       ldp     x0, x1, [sp, #16 * 0]
+
+       add     sp, sp, #16 * 17
+
+       eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set   vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+       save_registers
+       mov     x0, sp
+       mov     x1, #vector
+       bl      route_exception
+       restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+       b       handler_\label
+.popsection
+
+.set   vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+       mov     x0, #vector
+       mov     x1, #0 /* ec */
+       mov     x2, #0 /* valid_ec */
+       b       kvm_exit_unexpected_exception
+.popsection
+
+.set   vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+       HANDLER_INVALID                         // Synchronous EL1t
+       HANDLER_INVALID                         // IRQ EL1t
+       HANDLER_INVALID                         // FIQ EL1t
+       HANDLER_INVALID                         // Error EL1t
+
+       HANDLER el1h_sync                       // Synchronous EL1h
+       HANDLER el1h_irq                        // IRQ EL1h
+       HANDLER el1h_fiq                        // FIQ EL1h
+       HANDLER el1h_error                      // Error EL1h
+
+       HANDLER el0_sync_64                     // Synchronous 64-bit EL0
+       HANDLER el0_irq_64                      // IRQ 64-bit EL0
+       HANDLER el0_fiq_64                      // FIQ 64-bit EL0
+       HANDLER el0_error_64                    // Error 64-bit EL0
+
+       HANDLER el0_sync_32                     // Synchronous 32-bit EL0
+       HANDLER el0_irq_32                      // IRQ 32-bit EL0
+       HANDLER el0_fiq_32                      // FIQ 32-bit EL0
+       HANDLER el0_error_32                    // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
new file mode 100644 (file)
index 0000000..7ba3aa3
--- /dev/null
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AArch64 code
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "guest_modes.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#include <linux/bitfield.h>
+#include <linux/sizes.h>
+
+#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN    0xac0000
+
+static vm_vaddr_t exception_handlers;
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+       return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+       uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+
+       return (gva >> shift) & mask;
+}
+
+static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
+       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+       TEST_ASSERT(vm->pgtable_levels == 4,
+               "Mode %d does not have 4 page table levels", vm->mode);
+
+       return (gva >> shift) & mask;
+}
+
+static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
+       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+       TEST_ASSERT(vm->pgtable_levels >= 3,
+               "Mode %d does not have >= 3 page table levels", vm->mode);
+
+       return (gva >> shift) & mask;
+}
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+       return (gva >> vm->page_shift) & mask;
+}
+
+static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
+{
+       return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
+           (vm->pa_bits > 48 || vm->va_bits > 48);
+}
+
+static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
+{
+       uint64_t pte;
+
+       if (use_lpa2_pte_format(vm)) {
+               pte = pa & GENMASK(49, vm->page_shift);
+               pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
+               attrs &= ~GENMASK(9, 8);
+       } else {
+               pte = pa & GENMASK(47, vm->page_shift);
+               if (vm->page_shift == 16)
+                       pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+       }
+       pte |= attrs;
+
+       return pte;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
+{
+       uint64_t pa;
+
+       if (use_lpa2_pte_format(vm)) {
+               pa = pte & GENMASK(49, vm->page_shift);
+               pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+       } else {
+               pa = pte & GENMASK(47, vm->page_shift);
+               if (vm->page_shift == 16)
+                       pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+       }
+
+       return pa;
+}
+
+static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+{
+       unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+       return 1 << (vm->va_bits - shift);
+}
+
+static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
+{
+       return 1 << (vm->page_shift - 3);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+       size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+
+       if (vm->pgd_created)
+               return;
+
+       vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+                                    KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+                                    vm->memslots[MEM_REGION_PT]);
+       vm->pgd_created = true;
+}
+
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+                        uint64_t flags)
+{
+       uint8_t attr_idx = flags & 7;
+       uint64_t *ptep;
+
+       TEST_ASSERT((vaddr % vm->page_size) == 0,
+               "Virtual address not on page boundary,\n"
+               "  vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+               (vaddr >> vm->page_shift)),
+               "Invalid virtual address, vaddr: 0x%lx", vaddr);
+       TEST_ASSERT((paddr % vm->page_size) == 0,
+               "Physical address not on page boundary,\n"
+               "  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+               "Physical address beyond beyond maximum supported,\n"
+               "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+               paddr, vm->max_gfn, vm->page_size);
+
+       ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+       if (!*ptep)
+               *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+
+       switch (vm->pgtable_levels) {
+       case 4:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+               if (!*ptep)
+                       *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+               /* fall through */
+       case 3:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+               if (!*ptep)
+                       *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+               /* fall through */
+       case 2:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+               break;
+       default:
+               TEST_FAIL("Page table levels must be 2, 3, or 4");
+       }
+
+       *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3);  /* AF */
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+       uint64_t attr_idx = MT_NORMAL;
+
+       _virt_pg_map(vm, vaddr, paddr, attr_idx);
+}
+
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       uint64_t *ptep;
+
+       if (!vm->pgd_created)
+               goto unmapped_gva;
+
+       ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+       if (!ptep)
+               goto unmapped_gva;
+
+       switch (vm->pgtable_levels) {
+       case 4:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
+               if (!ptep)
+                       goto unmapped_gva;
+               /* fall through */
+       case 3:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
+               if (!ptep)
+                       goto unmapped_gva;
+               /* fall through */
+       case 2:
+               ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
+               if (!ptep)
+                       goto unmapped_gva;
+               break;
+       default:
+               TEST_FAIL("Page table levels must be 2, 3, or 4");
+       }
+
+       return ptep;
+
+unmapped_gva:
+       TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+       exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       uint64_t *ptep = virt_get_pte_hva(vm, gva);
+
+       return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+#ifdef DEBUG
+       static const char * const type[] = { "", "pud", "pmd", "pte" };
+       uint64_t pte, *ptep;
+
+       if (level == 4)
+               return;
+
+       for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+               ptep = addr_gpa2hva(vm, pte);
+               if (!*ptep)
+                       continue;
+               fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+               pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
+       }
+#endif
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+       int level = 4 - (vm->pgtable_levels - 1);
+       uint64_t pgd, *ptep;
+
+       if (!vm->pgd_created)
+               return;
+
+       for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+               ptep = addr_gpa2hva(vm, pgd);
+               if (!*ptep)
+                       continue;
+               fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+               pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
+       }
+}
+
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
+{
+       struct kvm_vcpu_init default_init = { .target = -1, };
+       struct kvm_vm *vm = vcpu->vm;
+       uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
+
+       if (!init)
+               init = &default_init;
+
+       if (init->target == -1) {
+               struct kvm_vcpu_init preferred;
+               vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
+               init->target = preferred.target;
+       }
+
+       vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
+
+       /*
+        * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
+        * registers, which the variable argument list macros do.
+        */
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
+
+       sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
+       tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
+
+       /* Configure base granule size */
+       switch (vm->mode) {
+       case VM_MODE_PXXV48_4K:
+               TEST_FAIL("AArch64 does not support 4K sized pages "
+                         "with ANY-bit physical address ranges");
+       case VM_MODE_P52V48_64K:
+       case VM_MODE_P48V48_64K:
+       case VM_MODE_P40V48_64K:
+       case VM_MODE_P36V48_64K:
+               tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+               break;
+       case VM_MODE_P52V48_16K:
+       case VM_MODE_P48V48_16K:
+       case VM_MODE_P40V48_16K:
+       case VM_MODE_P36V48_16K:
+       case VM_MODE_P36V47_16K:
+               tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+               break;
+       case VM_MODE_P52V48_4K:
+       case VM_MODE_P48V48_4K:
+       case VM_MODE_P40V48_4K:
+       case VM_MODE_P36V48_4K:
+               tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+               break;
+       default:
+               TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+       }
+
+       ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+
+       /* Configure output size */
+       switch (vm->mode) {
+       case VM_MODE_P52V48_4K:
+       case VM_MODE_P52V48_16K:
+       case VM_MODE_P52V48_64K:
+               tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+               ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
+               break;
+       case VM_MODE_P48V48_4K:
+       case VM_MODE_P48V48_16K:
+       case VM_MODE_P48V48_64K:
+               tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+               break;
+       case VM_MODE_P40V48_4K:
+       case VM_MODE_P40V48_16K:
+       case VM_MODE_P40V48_64K:
+               tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+               break;
+       case VM_MODE_P36V48_4K:
+       case VM_MODE_P36V48_16K:
+       case VM_MODE_P36V48_64K:
+       case VM_MODE_P36V47_16K:
+               tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+               break;
+       default:
+               TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+       }
+
+       sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
+       /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
+       tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
+       tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+       if (use_lpa2_pte_format(vm))
+               tcr_el1 |= (1ul << 59) /* DS */;
+
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+       uint64_t pstate, pc;
+
+       pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
+       pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+
+       fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
+               indent, "", pstate, pc);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                          struct kvm_vcpu_init *init)
+{
+       size_t stack_size;
+       uint64_t stack_vaddr;
+       struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+       stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+                                            vm->page_size;
+       stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+                                      DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
+                                      MEM_REGION_DATA);
+
+       aarch64_vcpu_setup(vcpu, init);
+
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+       return vcpu;
+}
+
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                 struct kvm_vcpu_init *init, void *guest_code)
+{
+       struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
+
+       vcpu_arch_set_entry_point(vcpu, guest_code);
+
+       return vcpu;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+       return __aarch64_vcpu_add(vm, vcpu_id, NULL);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+       va_list ap;
+       int i;
+
+       TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+                   "  num: %u", num);
+
+       va_start(ap, num);
+
+       for (i = 0; i < num; i++) {
+               vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
+                            va_arg(ap, uint64_t));
+       }
+
+       va_end(ap);
+}
+
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+       ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+       while (1)
+               ;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+               return;
+
+       if (uc.args[2]) /* valid_ec */ {
+               assert(VECTOR_IS_SYNC(uc.args[0]));
+               TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+                         uc.args[0], uc.args[1]);
+       } else {
+               assert(!VECTOR_IS_SYNC(uc.args[0]));
+               TEST_FAIL("Unexpected exception (vector:0x%lx)",
+                         uc.args[0]);
+       }
+}
+
+struct handlers {
+       handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
+{
+       extern char vectors;
+
+       vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+       struct handlers *handlers = (struct handlers *)exception_handlers;
+       bool valid_ec;
+       int ec = 0;
+
+       switch (vector) {
+       case VECTOR_SYNC_CURRENT:
+       case VECTOR_SYNC_LOWER_64:
+               ec = ESR_ELx_EC(read_sysreg(esr_el1));
+               valid_ec = true;
+               break;
+       case VECTOR_IRQ_CURRENT:
+       case VECTOR_IRQ_LOWER_64:
+       case VECTOR_FIQ_CURRENT:
+       case VECTOR_FIQ_LOWER_64:
+       case VECTOR_ERROR_CURRENT:
+       case VECTOR_ERROR_LOWER_64:
+               ec = 0;
+               valid_ec = false;
+               break;
+       default:
+               valid_ec = false;
+               goto unexpected_exception;
+       }
+
+       if (handlers && handlers->exception_handlers[vector][ec])
+               return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+       kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+       vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+                                       vm->page_size, MEM_REGION_DATA);
+
+       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+                        void (*handler)(struct ex_regs *))
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       assert(VECTOR_IS_SYNC(vector));
+       assert(vector < VECTOR_NUM);
+       assert(ec <= ESR_ELx_EC_MAX);
+       handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+                        void (*handler)(struct ex_regs *))
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       assert(!VECTOR_IS_SYNC(vector));
+       assert(vector < VECTOR_NUM);
+       handlers->exception_handlers[vector][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+       return read_sysreg(tpidr_el1);
+}
+
+static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
+                               uint32_t not_sup_val, uint32_t ipa52_min_val)
+{
+       if (gran == not_sup_val)
+               return 0;
+       else if (gran >= ipa52_min_val && vm_ipa >= 52)
+               return 52;
+       else
+               return min(vm_ipa, 48U);
+}
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+                                       uint32_t *ipa16k, uint32_t *ipa64k)
+{
+       struct kvm_vcpu_init preferred_init;
+       int kvm_fd, vm_fd, vcpu_fd, err;
+       uint64_t val;
+       uint32_t gran;
+       struct kvm_one_reg reg = {
+               .id     = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
+               .addr   = (uint64_t)&val,
+       };
+
+       kvm_fd = open_kvm_dev_path_or_exit();
+       vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
+       TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
+
+       vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+       TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
+
+       err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
+       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
+       err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
+       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
+
+       err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+       TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
+
+       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
+       *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
+                                       ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
+
+       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
+       *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
+                                       ID_AA64MMFR0_EL1_TGRAN64_IMP);
+
+       gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
+       *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
+                                       ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
+
+       close(vcpu_fd);
+       close(vm_fd);
+       close(kvm_fd);
+}
+
+#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5,    \
+                    arg6, res)                                                 \
+       asm volatile("mov   w0, %w[function_id]\n"                              \
+                    "mov   x1, %[arg0]\n"                                      \
+                    "mov   x2, %[arg1]\n"                                      \
+                    "mov   x3, %[arg2]\n"                                      \
+                    "mov   x4, %[arg3]\n"                                      \
+                    "mov   x5, %[arg4]\n"                                      \
+                    "mov   x6, %[arg5]\n"                                      \
+                    "mov   x7, %[arg6]\n"                                      \
+                    #insn  "#0\n"                                              \
+                    "mov   %[res0], x0\n"                                      \
+                    "mov   %[res1], x1\n"                                      \
+                    "mov   %[res2], x2\n"                                      \
+                    "mov   %[res3], x3\n"                                      \
+                    : [res0] "=r"(res->a0), [res1] "=r"(res->a1),              \
+                      [res2] "=r"(res->a2), [res3] "=r"(res->a3)               \
+                    : [function_id] "r"(function_id), [arg0] "r"(arg0),        \
+                      [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3),    \
+                      [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)     \
+                    : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
+
+
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+              uint64_t arg6, struct arm_smccc_res *res)
+{
+       __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+                    arg6, res);
+}
+
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+              uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+              uint64_t arg6, struct arm_smccc_res *res)
+{
+       __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+                    arg6, res);
+}
+
+void kvm_selftest_arch_init(void)
+{
+       /*
+        * arm64 doesn't have a true default mode, so start by computing the
+        * available IPA space and page sizes early.
+        */
+       guest_modes_append_default();
+}
+
+void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+       /*
+        * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
+        * is [0, 2^(64 - TCR_EL1.T0SZ)).
+        */
+       sparsebit_set_num(vm->vpages_valid, 0,
+                         (1ULL << vm->va_bits) >> vm->page_shift);
+}
+
+/* Helper to call wfi instruction. */
+void wfi(void)
+{
+       asm volatile("wfi");
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c
new file mode 100644 (file)
index 0000000..a076e78
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 Spinlock support
+ */
+#include <stdint.h>
+
+#include "spinlock.h"
+
+void spin_lock(struct spinlock *lock)
+{
+       int val, res;
+
+       asm volatile(
+       "1:     ldaxr   %w0, [%2]\n"
+       "       cbnz    %w0, 1b\n"
+       "       mov     %w0, #1\n"
+       "       stxr    %w1, %w0, [%2]\n"
+       "       cbnz    %w1, 1b\n"
+       : "=&r" (val), "=&r" (res)
+       : "r" (&lock->v)
+       : "memory");
+}
+
+void spin_unlock(struct spinlock *lock)
+{
+       asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c
new file mode 100644 (file)
index 0000000..ddab0ce
--- /dev/null
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+vm_vaddr_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+       vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+       virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+       vm->ucall_mmio_addr = mmio_gpa;
+
+       write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+
+       if (run->exit_reason == KVM_EXIT_MMIO &&
+           run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+               TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+                           "Unexpected ucall exit mmio address access");
+               return (void *)(*((uint64_t *)run->mmio.data));
+       }
+
+       return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c
new file mode 100644 (file)
index 0000000..4427f43
--- /dev/null
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 host support
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/cputype.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+
+/*
+ * vGIC-v3 default host setup
+ *
+ * Input args:
+ *     vm - KVM VM
+ *     nr_vcpus - Number of vCPUs supported by this VM
+ *
+ * Output args: None
+ *
+ * Return: GIC file-descriptor or negative error code upon failure
+ *
+ * The function creates a vGIC-v3 device and maps the distributor and
+ * redistributor regions of the guest. Since it depends on the number of
+ * vCPUs for the VM, it must be called after all the vCPUs have been created.
+ */
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+{
+       int gic_fd;
+       uint64_t attr;
+       struct list_head *iter;
+       unsigned int nr_gic_pages, nr_vcpus_created = 0;
+
+       TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
+
+       /*
+        * Make sure that the caller is infact calling this
+        * function after all the vCPUs are added.
+        */
+       list_for_each(iter, &vm->vcpus)
+               nr_vcpus_created++;
+       TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+                       "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
+                       nr_vcpus, nr_vcpus_created);
+
+       /* Distributor setup */
+       gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+       if (gic_fd < 0)
+               return gic_fd;
+
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
+
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       attr = GICD_BASE_GPA;
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
+       nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
+       virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages);
+
+       /* Redistributor setup */
+       attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0);
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr);
+       nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
+                                               KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
+       virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
+
+       kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       return gic_fd;
+}
+
+/* should only work for level sensitive interrupts */
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+       uint64_t attr = 32 * (intid / 32);
+       uint64_t index = intid % 32;
+       uint64_t val;
+       int ret;
+
+       ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+                                   attr, &val);
+       if (ret != 0)
+               return ret;
+
+       val |= 1U << index;
+       ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+                                   attr, &val);
+       return ret;
+}
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+       int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+
+       TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
+}
+
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+       uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+
+       TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
+               "doesn't allow injecting SGIs. There's no mask for it.");
+
+       if (INTID_IS_PPI(intid))
+               irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+       else
+               irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+       return _kvm_irq_line(vm, irq, level);
+}
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+       int ret = _kvm_arm_irq_line(vm, intid, level);
+
+       TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
+}
+
+static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
+                         uint64_t reg_off)
+{
+       uint64_t reg = intid / 32;
+       uint64_t index = intid % 32;
+       uint64_t attr = reg_off + reg * 4;
+       uint64_t val;
+       bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
+
+       uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+                                         : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
+
+       if (intid_is_private) {
+               /* TODO: only vcpu 0 implemented for now. */
+               assert(vcpu->id == 0);
+               attr += SZ_64K;
+       }
+
+       /* Check that the addr part of the attr is within 32 bits. */
+       assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
+
+       /*
+        * All calls will succeed, even with invalid intid's, as long as the
+        * addr part of the attr is within 32 bits (checked above). An invalid
+        * intid will just make the read/writes point to above the intended
+        * register space (i.e., ICPENDR after ISPENDR).
+        */
+       kvm_device_attr_get(gic_fd, group, attr, &val);
+       val |= 1ULL << index;
+       kvm_device_attr_set(gic_fd, group, attr, &val);
+}
+
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+       vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
+}
+
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+       vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
+}
+
+int vgic_its_setup(struct kvm_vm *vm)
+{
+       int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+       u64 attr;
+
+       attr = GITS_BASE_GPA;
+       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                           KVM_VGIC_ITS_ADDR_TYPE, &attr);
+
+       kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                           KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+       virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA,
+                vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE));
+
+       return its_fd;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
new file mode 100644 (file)
index 0000000..2c432fa
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define ICPT_INSTRUCTION       0x04
+#define IPA0_DIAG              0x8300
+
+static void guest_code(void)
+{
+       uint64_t diag318_info = 0x12345678;
+
+       asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
+}
+
+/*
+ * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
+ * we create an ad-hoc VM here to handle the instruction then extract the
+ * necessary data. It is up to the caller to decide what to do with that data.
+ */
+static uint64_t diag318_handler(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       uint64_t reg;
+       uint64_t diag318_info;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_run(vcpu);
+       run = vcpu->run;
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
+                   "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
+       TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
+                   "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
+
+       reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
+       diag318_info = run->s.regs.gprs[reg];
+
+       TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
+
+       kvm_vm_free(vm);
+
+       return diag318_info;
+}
+
+uint64_t get_diag318_info(void)
+{
+       static uint64_t diag318_info;
+       static bool printed_skip;
+
+       /*
+        * If KVM does not support diag318, then return 0 to
+        * ensure tests do not break.
+        */
+       if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
+               if (!printed_skip) {
+                       fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
+                               "Skipping diag318 test.\n");
+                       printed_skip = true;
+               }
+               return 0;
+       }
+
+       /*
+        * If a test has previously requested the diag318 info,
+        * then don't bother spinning up a temporary VM again.
+        */
+       if (!diag318_info)
+               diag318_info = diag318_handler();
+
+       return diag318_info;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c
new file mode 100644 (file)
index 0000000..d540812
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * Contains the definition for the global variables to have the test facitlity feature.
+ */
+
+#include "facility.h"
+
+uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+bool stfle_flag;
diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
new file mode 100644 (file)
index 0000000..20cfe97
--- /dev/null
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM selftest s390x library code - CPU-related functions (page tables...)
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+
+#include "processor.h"
+#include "kvm_util.h"
+
+#define PAGES_PER_REGION 4
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+       vm_paddr_t paddr;
+
+       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
+                   vm->page_size);
+
+       if (vm->pgd_created)
+               return;
+
+       paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
+                                  KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+                                  vm->memslots[MEM_REGION_PT]);
+       memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+       vm->pgd = paddr;
+       vm->pgd_created = true;
+}
+
+/*
+ * Allocate 4 pages for a region/segment table (ri < 4), or one page for
+ * a page table (ri == 4). Returns a suitable region/segment table entry
+ * which points to the freshly allocated pages.
+ */
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
+{
+       uint64_t taddr;
+
+       taddr = vm_phy_pages_alloc(vm,  ri < 4 ? PAGES_PER_REGION : 1,
+                                  KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+       memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+       return (taddr & REGION_ENTRY_ORIGIN)
+               | (((4 - ri) << 2) & REGION_ENTRY_TYPE)
+               | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
+{
+       int ri, idx;
+       uint64_t *entry;
+
+       TEST_ASSERT((gva % vm->page_size) == 0,
+               "Virtual address not on page boundary,\n"
+               "  vaddr: 0x%lx vm->page_size: 0x%x",
+               gva, vm->page_size);
+       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+               (gva >> vm->page_shift)),
+               "Invalid virtual address, vaddr: 0x%lx",
+               gva);
+       TEST_ASSERT((gpa % vm->page_size) == 0,
+               "Physical address not on page boundary,\n"
+               "  paddr: 0x%lx vm->page_size: 0x%x",
+               gva, vm->page_size);
+       TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
+               "Physical address beyond beyond maximum supported,\n"
+               "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+               gva, vm->max_gfn, vm->page_size);
+
+       /* Walk through region and segment tables */
+       entry = addr_gpa2hva(vm, vm->pgd);
+       for (ri = 1; ri <= 4; ri++) {
+               idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+               if (entry[idx] & REGION_ENTRY_INVALID)
+                       entry[idx] = virt_alloc_region(vm, ri);
+               entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+       }
+
+       /* Fill in page table entry */
+       idx = (gva >> PAGE_SHIFT) & 0x0ffu;             /* page index */
+       if (!(entry[idx] & PAGE_INVALID))
+               fprintf(stderr,
+                       "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
+       entry[idx] = gpa;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       int ri, idx;
+       uint64_t *entry;
+
+       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
+                   vm->page_size);
+
+       entry = addr_gpa2hva(vm, vm->pgd);
+       for (ri = 1; ri <= 4; ri++) {
+               idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+               TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
+                           "No region mapping for vm virtual address 0x%lx",
+                           gva);
+               entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+       }
+
+       idx = (gva >> PAGE_SHIFT) & 0x0ffu;             /* page index */
+
+       TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
+                   "No page mapping for vm virtual address 0x%lx", gva);
+
+       return (entry[idx] & ~0xffful) + (gva & 0xffful);
+}
+
+static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+                          uint64_t ptea_start)
+{
+       uint64_t *pte, ptea;
+
+       for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
+               pte = addr_gpa2hva(vm, ptea);
+               if (*pte & PAGE_INVALID)
+                       continue;
+               fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
+                       indent, "", ptea, *pte);
+       }
+}
+
+static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+                            uint64_t reg_tab_addr)
+{
+       uint64_t addr, *entry;
+
+       for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
+               entry = addr_gpa2hva(vm, addr);
+               if (*entry & REGION_ENTRY_INVALID)
+                       continue;
+               fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
+                       indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
+                       addr, *entry);
+               if (*entry & REGION_ENTRY_TYPE) {
+                       virt_dump_region(stream, vm, indent + 2,
+                                        *entry & REGION_ENTRY_ORIGIN);
+               } else {
+                       virt_dump_ptes(stream, vm, indent + 2,
+                                      *entry & REGION_ENTRY_ORIGIN);
+               }
+       }
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+       if (!vm->pgd_created)
+               return;
+
+       virt_dump_region(stream, vm, indent, vm->pgd);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu->run->psw_addr = (uintptr_t)guest_code;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+       size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
+       uint64_t stack_vaddr;
+       struct kvm_regs regs;
+       struct kvm_sregs sregs;
+       struct kvm_vcpu *vcpu;
+
+       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
+                   vm->page_size);
+
+       stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+                                      DEFAULT_GUEST_STACK_VADDR_MIN,
+                                      MEM_REGION_DATA);
+
+       vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+       /* Setup guest registers */
+       vcpu_regs_get(vcpu, &regs);
+       regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
+       vcpu_regs_set(vcpu, &regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       sregs.crs[0] |= 0x00040000;             /* Enable floating point regs */
+       sregs.crs[1] = vm->pgd | 0xf;           /* Primary region table */
+       vcpu_sregs_set(vcpu, &sregs);
+
+       vcpu->run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
+
+       return vcpu;
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+       va_list ap;
+       struct kvm_regs regs;
+       int i;
+
+       TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
+                   "  num: %u",
+                   num);
+
+       va_start(ap, num);
+       vcpu_regs_get(vcpu, &regs);
+
+       for (i = 0; i < num; i++)
+               regs.gprs[i + 2] = va_arg(ap, uint64_t);
+
+       vcpu_regs_set(vcpu, &regs);
+       va_end(ap);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+       fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
+               indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c
new file mode 100644 (file)
index 0000000..cca9873
--- /dev/null
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+
+       if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
+           run->s390_sieic.icptcode == 4 &&
+           (run->s390_sieic.ipa >> 8) == 0x83 &&    /* 0x83 means DIAGNOSE */
+           (run->s390_sieic.ipb >> 16) == 0x501) {
+               int reg = run->s390_sieic.ipa & 0xf;
+
+               return (void *)run->s.regs.gprs[reg];
+       }
+       return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
deleted file mode 100644 (file)
index 2c432fa..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test handler for the s390x DIAGNOSE 0x0318 instruction.
- *
- * Copyright (C) 2020, IBM
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-
-#define ICPT_INSTRUCTION       0x04
-#define IPA0_DIAG              0x8300
-
-static void guest_code(void)
-{
-       uint64_t diag318_info = 0x12345678;
-
-       asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
-}
-
-/*
- * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
- * we create an ad-hoc VM here to handle the instruction then extract the
- * necessary data. It is up to the caller to decide what to do with that data.
- */
-static uint64_t diag318_handler(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_run *run;
-       uint64_t reg;
-       uint64_t diag318_info;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_run(vcpu);
-       run = vcpu->run;
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
-                   "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
-       TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
-                   "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
-
-       reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
-       diag318_info = run->s.regs.gprs[reg];
-
-       TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
-
-       kvm_vm_free(vm);
-
-       return diag318_info;
-}
-
-uint64_t get_diag318_info(void)
-{
-       static uint64_t diag318_info;
-       static bool printed_skip;
-
-       /*
-        * If KVM does not support diag318, then return 0 to
-        * ensure tests do not break.
-        */
-       if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
-               if (!printed_skip) {
-                       fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
-                               "Skipping diag318 test.\n");
-                       printed_skip = true;
-               }
-               return 0;
-       }
-
-       /*
-        * If a test has previously requested the diag318 info,
-        * then don't bother spinning up a temporary VM again.
-        */
-       if (!diag318_info)
-               diag318_info = diag318_handler();
-
-       return diag318_info;
-}
diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390x/facility.c
deleted file mode 100644 (file)
index d540812..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Hariharan Mari <hari55@linux.ibm.com>
- *
- * Contains the definition for the global variables to have the test facitlity feature.
- */
-
-#include "facility.h"
-
-uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
-bool stfle_flag;
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
deleted file mode 100644 (file)
index 20cfe97..0000000
+++ /dev/null
@@ -1,223 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM selftest s390x library code - CPU-related functions (page tables...)
- *
- * Copyright (C) 2019, Red Hat, Inc.
- */
-
-#include "processor.h"
-#include "kvm_util.h"
-
-#define PAGES_PER_REGION 4
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm)
-{
-       vm_paddr_t paddr;
-
-       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
-                   vm->page_size);
-
-       if (vm->pgd_created)
-               return;
-
-       paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
-                                  KVM_GUEST_PAGE_TABLE_MIN_PADDR,
-                                  vm->memslots[MEM_REGION_PT]);
-       memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
-
-       vm->pgd = paddr;
-       vm->pgd_created = true;
-}
-
-/*
- * Allocate 4 pages for a region/segment table (ri < 4), or one page for
- * a page table (ri == 4). Returns a suitable region/segment table entry
- * which points to the freshly allocated pages.
- */
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
-{
-       uint64_t taddr;
-
-       taddr = vm_phy_pages_alloc(vm,  ri < 4 ? PAGES_PER_REGION : 1,
-                                  KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
-       memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
-
-       return (taddr & REGION_ENTRY_ORIGIN)
-               | (((4 - ri) << 2) & REGION_ENTRY_TYPE)
-               | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
-}
-
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
-{
-       int ri, idx;
-       uint64_t *entry;
-
-       TEST_ASSERT((gva % vm->page_size) == 0,
-               "Virtual address not on page boundary,\n"
-               "  vaddr: 0x%lx vm->page_size: 0x%x",
-               gva, vm->page_size);
-       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
-               (gva >> vm->page_shift)),
-               "Invalid virtual address, vaddr: 0x%lx",
-               gva);
-       TEST_ASSERT((gpa % vm->page_size) == 0,
-               "Physical address not on page boundary,\n"
-               "  paddr: 0x%lx vm->page_size: 0x%x",
-               gva, vm->page_size);
-       TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
-               "Physical address beyond beyond maximum supported,\n"
-               "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-               gva, vm->max_gfn, vm->page_size);
-
-       /* Walk through region and segment tables */
-       entry = addr_gpa2hva(vm, vm->pgd);
-       for (ri = 1; ri <= 4; ri++) {
-               idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
-               if (entry[idx] & REGION_ENTRY_INVALID)
-                       entry[idx] = virt_alloc_region(vm, ri);
-               entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
-       }
-
-       /* Fill in page table entry */
-       idx = (gva >> PAGE_SHIFT) & 0x0ffu;             /* page index */
-       if (!(entry[idx] & PAGE_INVALID))
-               fprintf(stderr,
-                       "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
-       entry[idx] = gpa;
-}
-
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       int ri, idx;
-       uint64_t *entry;
-
-       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
-                   vm->page_size);
-
-       entry = addr_gpa2hva(vm, vm->pgd);
-       for (ri = 1; ri <= 4; ri++) {
-               idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
-               TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
-                           "No region mapping for vm virtual address 0x%lx",
-                           gva);
-               entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
-       }
-
-       idx = (gva >> PAGE_SHIFT) & 0x0ffu;             /* page index */
-
-       TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
-                   "No page mapping for vm virtual address 0x%lx", gva);
-
-       return (entry[idx] & ~0xffful) + (gva & 0xffful);
-}
-
-static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
-                          uint64_t ptea_start)
-{
-       uint64_t *pte, ptea;
-
-       for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
-               pte = addr_gpa2hva(vm, ptea);
-               if (*pte & PAGE_INVALID)
-                       continue;
-               fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
-                       indent, "", ptea, *pte);
-       }
-}
-
-static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
-                            uint64_t reg_tab_addr)
-{
-       uint64_t addr, *entry;
-
-       for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
-               entry = addr_gpa2hva(vm, addr);
-               if (*entry & REGION_ENTRY_INVALID)
-                       continue;
-               fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
-                       indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
-                       addr, *entry);
-               if (*entry & REGION_ENTRY_TYPE) {
-                       virt_dump_region(stream, vm, indent + 2,
-                                        *entry & REGION_ENTRY_ORIGIN);
-               } else {
-                       virt_dump_ptes(stream, vm, indent + 2,
-                                      *entry & REGION_ENTRY_ORIGIN);
-               }
-       }
-}
-
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
-       if (!vm->pgd_created)
-               return;
-
-       virt_dump_region(stream, vm, indent, vm->pgd);
-}
-
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
-{
-       vcpu->run->psw_addr = (uintptr_t)guest_code;
-}
-
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
-{
-       size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
-       uint64_t stack_vaddr;
-       struct kvm_regs regs;
-       struct kvm_sregs sregs;
-       struct kvm_vcpu *vcpu;
-
-       TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
-                   vm->page_size);
-
-       stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
-                                      DEFAULT_GUEST_STACK_VADDR_MIN,
-                                      MEM_REGION_DATA);
-
-       vcpu = __vm_vcpu_add(vm, vcpu_id);
-
-       /* Setup guest registers */
-       vcpu_regs_get(vcpu, &regs);
-       regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
-       vcpu_regs_set(vcpu, &regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       sregs.crs[0] |= 0x00040000;             /* Enable floating point regs */
-       sregs.crs[1] = vm->pgd | 0xf;           /* Primary region table */
-       vcpu_sregs_set(vcpu, &sregs);
-
-       vcpu->run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
-
-       return vcpu;
-}
-
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
-{
-       va_list ap;
-       struct kvm_regs regs;
-       int i;
-
-       TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
-                   "  num: %u",
-                   num);
-
-       va_start(ap, num);
-       vcpu_regs_get(vcpu, &regs);
-
-       for (i = 0; i < num; i++)
-               regs.gprs[i + 2] = va_arg(ap, uint64_t);
-
-       vcpu_regs_set(vcpu, &regs);
-       va_end(ap);
-}
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
-{
-       fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
-               indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
-}
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
deleted file mode 100644 (file)
index cca9873..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2019 Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-
-       if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
-           run->s390_sieic.icptcode == 4 &&
-           (run->s390_sieic.ipa >> 8) == 0x83 &&    /* 0x83 means DIAGNOSE */
-           (run->s390_sieic.ipb >> 16) == 0x501) {
-               int reg = run->s390_sieic.ipa & 0xf;
-
-               return (void *)run->s.regs.gprs[reg];
-       }
-       return NULL;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c
new file mode 100644 (file)
index 0000000..89153a3
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+       wrmsr(MSR_IA32_APICBASE,
+             rdmsr(MSR_IA32_APICBASE) &
+               ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+       uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+       /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+       if (val & MSR_IA32_APICBASE_EXTD) {
+               apic_disable();
+               wrmsr(MSR_IA32_APICBASE,
+                     rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+       } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+               wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+       }
+
+       /*
+        * Per SDM: reset value of spurious interrupt vector register has the
+        * APIC software enabled bit=0. It must be enabled in addition to the
+        * enable bit in the MSR.
+        */
+       val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+       xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+       wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+             MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+       x2apic_write_reg(APIC_SPIV,
+                        x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S
new file mode 100644 (file)
index 0000000..7629819
--- /dev/null
@@ -0,0 +1,81 @@
+handle_exception:
+       push %r15
+       push %r14
+       push %r13
+       push %r12
+       push %r11
+       push %r10
+       push %r9
+       push %r8
+
+       push %rdi
+       push %rsi
+       push %rbp
+       push %rbx
+       push %rdx
+       push %rcx
+       push %rax
+       mov %rsp, %rdi
+
+       call route_exception
+
+       pop %rax
+       pop %rcx
+       pop %rdx
+       pop %rbx
+       pop %rbp
+       pop %rsi
+       pop %rdi
+       pop %r8
+       pop %r9
+       pop %r10
+       pop %r11
+       pop %r12
+       pop %r13
+       pop %r14
+       pop %r15
+
+       /* Discard vector and error code. */
+       add $16, %rsp
+       iretq
+
+/*
+ * Build the handle_exception wrappers which push the vector/error code on the
+ * stack and an array of pointers to those wrappers.
+ */
+.pushsection .rodata
+.globl idt_handlers
+idt_handlers:
+.popsection
+
+.macro HANDLERS has_error from to
+       vector = \from
+       .rept \to - \from + 1
+       .align 8
+
+       /* Fetch current address and append it to idt_handlers. */
+666 :
+.pushsection .rodata
+       .quad 666b
+.popsection
+
+       .if ! \has_error
+       pushq $0
+       .endif
+       pushq $vector
+       jmp handle_exception
+       vector = vector + 1
+       .endr
+.endm
+
+.global idt_handler_code
+idt_handler_code:
+       HANDLERS has_error=0 from=0  to=7
+       HANDLERS has_error=1 from=8  to=8
+       HANDLERS has_error=0 from=9  to=9
+       HANDLERS has_error=1 from=10 to=14
+       HANDLERS has_error=0 from=15 to=16
+       HANDLERS has_error=1 from=17 to=17
+       HANDLERS has_error=0 from=18 to=255
+
+.section        .note.GNU-stack, "", %progbits
diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c
new file mode 100644 (file)
index 0000000..15bc8cd
--- /dev/null
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hyper-V specific functions.
+ *
+ * Copyright (C) 2021, Red Hat Inc.
+ */
+#include <stdint.h>
+#include "processor.h"
+#include "hyperv.h"
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+{
+       static struct kvm_cpuid2 *cpuid;
+       int kvm_fd;
+
+       if (cpuid)
+               return cpuid;
+
+       cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+       kvm_fd = open_kvm_dev_path_or_exit();
+
+       kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+       close(kvm_fd);
+       return cpuid;
+}
+
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+       static struct kvm_cpuid2 *cpuid_full;
+       const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+       int i, nent = 0;
+
+       if (!cpuid_full) {
+               cpuid_sys = kvm_get_supported_cpuid();
+               cpuid_hv = kvm_get_supported_hv_cpuid();
+
+               cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
+               if (!cpuid_full) {
+                       perror("malloc");
+                       abort();
+               }
+
+               /* Need to skip KVM CPUID leaves 0x400000xx */
+               for (i = 0; i < cpuid_sys->nent; i++) {
+                       if (cpuid_sys->entries[i].function >= 0x40000000 &&
+                           cpuid_sys->entries[i].function < 0x40000100)
+                               continue;
+                       cpuid_full->entries[nent] = cpuid_sys->entries[i];
+                       nent++;
+               }
+
+               memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
+                      cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
+               cpuid_full->nent = nent + cpuid_hv->nent;
+       }
+
+       vcpu_init_cpuid(vcpu, cpuid_full);
+}
+
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+
+       vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+       return cpuid;
+}
+
+bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+       if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID))
+               return false;
+
+       return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature);
+}
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+                                                      vm_vaddr_t *p_hv_pages_gva)
+{
+       vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
+       struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
+
+       /* Setup of a region of guest memory for the VP Assist page. */
+       hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
+       hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
+       hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
+
+       /* Setup of a region of guest memory for the partition assist page. */
+       hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
+       hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
+       hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
+
+       /* Setup of a region of guest memory for the enlightened VMCS. */
+       hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
+       hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
+       hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
+
+       *p_hv_pages_gva = hv_pages_gva;
+       return hv;
+}
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+       uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+               HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+       wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+       current_vp_assist = vp_assist;
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
new file mode 100644 (file)
index 0000000..7f5d62a
--- /dev/null
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86-specific extensions to memstress.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "memstress.h"
+#include "processor.h"
+#include "vmx.h"
+
+void memstress_l2_guest_code(uint64_t vcpu_id)
+{
+       memstress_guest_code(vcpu_id);
+       vmcall();
+}
+
+extern char memstress_l2_guest_entry[];
+__asm__(
+"memstress_l2_guest_entry:"
+"      mov (%rsp), %rdi;"
+"      call memstress_l2_guest_code;"
+"      ud2;"
+);
+
+static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       unsigned long *rsp;
+
+       GUEST_ASSERT(vmx->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+       GUEST_ASSERT(load_vmcs(vmx));
+       GUEST_ASSERT(ept_1g_pages_supported());
+
+       rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+       *rsp = vcpu_id;
+       prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       GUEST_DONE();
+}
+
+uint64_t memstress_nested_pages(int nr_vcpus)
+{
+       /*
+        * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+        * pages and 4-level paging, plus a few pages per-vCPU for data
+        * structures such as the VMCS.
+        */
+       return 513 + 10 * nr_vcpus;
+}
+
+void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+       uint64_t start, end;
+
+       prepare_eptp(vmx, vm, 0);
+
+       /*
+        * Identity map the first 4G and the test region with 1G pages so that
+        * KVM can shadow the EPT12 with the maximum huge page size supported
+        * by the backing source.
+        */
+       nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+       start = align_down(memstress_args.gpa, PG_SIZE_1G);
+       end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
+       nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
+{
+       struct vmx_pages *vmx, *vmx0 = NULL;
+       struct kvm_regs regs;
+       vm_vaddr_t vmx_gva;
+       int vcpu_id;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+       TEST_REQUIRE(kvm_cpu_has_ept());
+
+       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+               vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+               if (vcpu_id == 0) {
+                       memstress_setup_ept(vmx, vm);
+                       vmx0 = vmx;
+               } else {
+                       /* Share the same EPT table across all vCPUs. */
+                       vmx->eptp = vmx0->eptp;
+                       vmx->eptp_hva = vmx0->eptp_hva;
+                       vmx->eptp_gpa = vmx0->eptp_gpa;
+               }
+
+               /*
+                * Override the vCPU to run memstress_l1_guest_code() which will
+                * bounce it into L2 before calling memstress_guest_code().
+                */
+               vcpu_regs_get(vcpus[vcpu_id], &regs);
+               regs.rip = (unsigned long) memstress_l1_guest_code;
+               vcpu_regs_set(vcpus[vcpu_id], &regs);
+               vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
+       }
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c
new file mode 100644 (file)
index 0000000..f31f042
--- /dev/null
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#include <stdint.h>
+
+#include <linux/kernel.h>
+
+#include "kvm_util.h"
+#include "pmu.h"
+
+const uint64_t intel_pmu_arch_events[] = {
+       INTEL_ARCH_CPU_CYCLES,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED,
+       INTEL_ARCH_REFERENCE_CYCLES,
+       INTEL_ARCH_LLC_REFERENCES,
+       INTEL_ARCH_LLC_MISSES,
+       INTEL_ARCH_BRANCHES_RETIRED,
+       INTEL_ARCH_BRANCHES_MISPREDICTED,
+       INTEL_ARCH_TOPDOWN_SLOTS,
+};
+kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
+
+const uint64_t amd_pmu_zen_events[] = {
+       AMD_ZEN_CORE_CYCLES,
+       AMD_ZEN_INSTRUCTIONS_RETIRED,
+       AMD_ZEN_BRANCHES_RETIRED,
+       AMD_ZEN_BRANCHES_MISPREDICTED,
+};
+kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
new file mode 100644 (file)
index 0000000..bd5a802
--- /dev/null
@@ -0,0 +1,1293 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include "linux/bitmap.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define KERNEL_CS      0x8
+#define KERNEL_DS      0x10
+#define KERNEL_TSS     0x18
+
+vm_vaddr_t exception_handlers;
+bool host_cpu_is_amd;
+bool host_cpu_is_intel;
+bool is_forced_emulation_enabled;
+uint64_t guest_tsc_khz;
+
+static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
+{
+       fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+               "rcx: 0x%.16llx rdx: 0x%.16llx\n",
+               indent, "",
+               regs->rax, regs->rbx, regs->rcx, regs->rdx);
+       fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+               "rsp: 0x%.16llx rbp: 0x%.16llx\n",
+               indent, "",
+               regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+       fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
+               "r10: 0x%.16llx r11: 0x%.16llx\n",
+               indent, "",
+               regs->r8, regs->r9, regs->r10, regs->r11);
+       fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+               "r14: 0x%.16llx r15: 0x%.16llx\n",
+               indent, "",
+               regs->r12, regs->r13, regs->r14, regs->r15);
+       fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+               indent, "",
+               regs->rip, regs->rflags);
+}
+
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+                        uint8_t indent)
+{
+       fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+               "selector: 0x%.4x type: 0x%.2x\n",
+               indent, "", segment->base, segment->limit,
+               segment->selector, segment->type);
+       fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+               "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+               indent, "", segment->present, segment->dpl,
+               segment->db, segment->s, segment->l);
+       fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+               "unusable: 0x%.2x padding: 0x%.2x\n",
+               indent, "", segment->g, segment->avl,
+               segment->unusable, segment->padding);
+}
+
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+                       uint8_t indent)
+{
+       fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+               "padding: 0x%.4x 0x%.4x 0x%.4x\n",
+               indent, "", dtable->base, dtable->limit,
+               dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
+{
+       unsigned int i;
+
+       fprintf(stream, "%*scs:\n", indent, "");
+       segment_dump(stream, &sregs->cs, indent + 2);
+       fprintf(stream, "%*sds:\n", indent, "");
+       segment_dump(stream, &sregs->ds, indent + 2);
+       fprintf(stream, "%*ses:\n", indent, "");
+       segment_dump(stream, &sregs->es, indent + 2);
+       fprintf(stream, "%*sfs:\n", indent, "");
+       segment_dump(stream, &sregs->fs, indent + 2);
+       fprintf(stream, "%*sgs:\n", indent, "");
+       segment_dump(stream, &sregs->gs, indent + 2);
+       fprintf(stream, "%*sss:\n", indent, "");
+       segment_dump(stream, &sregs->ss, indent + 2);
+       fprintf(stream, "%*str:\n", indent, "");
+       segment_dump(stream, &sregs->tr, indent + 2);
+       fprintf(stream, "%*sldt:\n", indent, "");
+       segment_dump(stream, &sregs->ldt, indent + 2);
+
+       fprintf(stream, "%*sgdt:\n", indent, "");
+       dtable_dump(stream, &sregs->gdt, indent + 2);
+       fprintf(stream, "%*sidt:\n", indent, "");
+       dtable_dump(stream, &sregs->idt, indent + 2);
+
+       fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+               "cr3: 0x%.16llx cr4: 0x%.16llx\n",
+               indent, "",
+               sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+       fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+               "apic_base: 0x%.16llx\n",
+               indent, "",
+               sregs->cr8, sregs->efer, sregs->apic_base);
+
+       fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+       for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+               fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+                       sregs->interrupt_bitmap[i]);
+       }
+}
+
+bool kvm_is_tdp_enabled(void)
+{
+       if (host_cpu_is_intel)
+               return get_kvm_intel_param_bool("ept");
+       else
+               return get_kvm_amd_param_bool("npt");
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+               "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+       /* If needed, create page map l4 table. */
+       if (!vm->pgd_created) {
+               vm->pgd = vm_alloc_page_table(vm);
+               vm->pgd_created = true;
+       }
+}
+
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
+                         uint64_t vaddr, int level)
+{
+       uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
+       uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
+       int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+
+       TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+                   "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
+                   level + 1, vaddr);
+
+       return &page_table[index];
+}
+
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+                                      uint64_t *parent_pte,
+                                      uint64_t vaddr,
+                                      uint64_t paddr,
+                                      int current_level,
+                                      int target_level)
+{
+       uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
+
+       paddr = vm_untag_gpa(vm, paddr);
+
+       if (!(*pte & PTE_PRESENT_MASK)) {
+               *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+               if (current_level == target_level)
+                       *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+               else
+                       *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
+       } else {
+               /*
+                * Entry already present.  Assert that the caller doesn't want
+                * a hugepage at this level, and that there isn't a hugepage at
+                * this level.
+                */
+               TEST_ASSERT(current_level != target_level,
+                           "Cannot create hugepage at level: %u, vaddr: 0x%lx",
+                           current_level, vaddr);
+               TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+                           "Cannot create page table at level: %u, vaddr: 0x%lx",
+                           current_level, vaddr);
+       }
+       return pte;
+}
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
+{
+       const uint64_t pg_size = PG_LEVEL_SIZE(level);
+       uint64_t *pml4e, *pdpe, *pde;
+       uint64_t *pte;
+
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+                   "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+       TEST_ASSERT((vaddr % pg_size) == 0,
+                   "Virtual address not aligned,\n"
+                   "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+                   "Invalid virtual address, vaddr: 0x%lx", vaddr);
+       TEST_ASSERT((paddr % pg_size) == 0,
+                   "Physical address not aligned,\n"
+                   "  paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
+       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+                   "Physical address beyond maximum supported,\n"
+                   "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+                   paddr, vm->max_gfn, vm->page_size);
+       TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
+                   "Unexpected bits in paddr: %lx", paddr);
+
+       /*
+        * Allocate upper level page tables, if not already present.  Return
+        * early if a hugepage was created.
+        */
+       pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
+       if (*pml4e & PTE_LARGE_MASK)
+               return;
+
+       pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
+       if (*pdpe & PTE_LARGE_MASK)
+               return;
+
+       pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
+       if (*pde & PTE_LARGE_MASK)
+               return;
+
+       /* Fill in page table entry. */
+       pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+       TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+                   "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
+       *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+
+       /*
+        * Neither SEV nor TDX supports shared page tables, so only the final
+        * leaf PTE needs manually set the C/S-bit.
+        */
+       if (vm_is_gpa_protected(vm, paddr))
+               *pte |= vm->arch.c_bit;
+       else
+               *pte |= vm->arch.s_bit;
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+       __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
+}
+
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+                   uint64_t nr_bytes, int level)
+{
+       uint64_t pg_size = PG_LEVEL_SIZE(level);
+       uint64_t nr_pages = nr_bytes / pg_size;
+       int i;
+
+       TEST_ASSERT(nr_bytes % pg_size == 0,
+                   "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
+                   nr_bytes, pg_size);
+
+       for (i = 0; i < nr_pages; i++) {
+               __virt_pg_map(vm, vaddr, paddr, level);
+
+               vaddr += pg_size;
+               paddr += pg_size;
+       }
+}
+
+static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+{
+       if (*pte & PTE_LARGE_MASK) {
+               TEST_ASSERT(*level == PG_LEVEL_NONE ||
+                           *level == current_level,
+                           "Unexpected hugepage at level %d", current_level);
+               *level = current_level;
+       }
+
+       return *level == current_level;
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+                                   int *level)
+{
+       uint64_t *pml4e, *pdpe, *pde;
+
+       TEST_ASSERT(!vm->arch.is_pt_protected,
+                   "Walking page tables of protected guests is impossible");
+
+       TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+                   "Invalid PG_LEVEL_* '%d'", *level);
+
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+               "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+               (vaddr >> vm->page_shift)),
+               "Invalid virtual address, vaddr: 0x%lx",
+               vaddr);
+       /*
+        * Based on the mode check above there are 48 bits in the vaddr, so
+        * shift 16 to sign extend the last bit (bit-47),
+        */
+       TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
+               "Canonical check failed.  The virtual address is invalid.");
+
+       pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
+       if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
+               return pml4e;
+
+       pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
+       if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
+               return pdpe;
+
+       pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
+       if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
+               return pde;
+
+       return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+}
+
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
+{
+       int level = PG_LEVEL_4K;
+
+       return __vm_get_page_table_entry(vm, vaddr, &level);
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+       uint64_t *pml4e, *pml4e_start;
+       uint64_t *pdpe, *pdpe_start;
+       uint64_t *pde, *pde_start;
+       uint64_t *pte, *pte_start;
+
+       if (!vm->pgd_created)
+               return;
+
+       fprintf(stream, "%*s                                          "
+               "                no\n", indent, "");
+       fprintf(stream, "%*s      index hvaddr         gpaddr         "
+               "addr         w exec dirty\n",
+               indent, "");
+       pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
+       for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+               pml4e = &pml4e_start[n1];
+               if (!(*pml4e & PTE_PRESENT_MASK))
+                       continue;
+               fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
+                       " %u\n",
+                       indent, "",
+                       pml4e - pml4e_start, pml4e,
+                       addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+                       !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
+
+               pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
+               for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+                       pdpe = &pdpe_start[n2];
+                       if (!(*pdpe & PTE_PRESENT_MASK))
+                               continue;
+                       fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
+                               "%u  %u\n",
+                               indent, "",
+                               pdpe - pdpe_start, pdpe,
+                               addr_hva2gpa(vm, pdpe),
+                               PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+                               !!(*pdpe & PTE_NX_MASK));
+
+                       pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
+                       for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+                               pde = &pde_start[n3];
+                               if (!(*pde & PTE_PRESENT_MASK))
+                                       continue;
+                               fprintf(stream, "%*spde   0x%-3zx %p "
+                                       "0x%-12lx 0x%-10llx %u  %u\n",
+                                       indent, "", pde - pde_start, pde,
+                                       addr_hva2gpa(vm, pde),
+                                       PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+                                       !!(*pde & PTE_NX_MASK));
+
+                               pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
+                               for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+                                       pte = &pte_start[n4];
+                                       if (!(*pte & PTE_PRESENT_MASK))
+                                               continue;
+                                       fprintf(stream, "%*spte   0x%-3zx %p "
+                                               "0x%-12lx 0x%-10llx %u  %u "
+                                               "    %u    0x%-10lx\n",
+                                               indent, "",
+                                               pte - pte_start, pte,
+                                               addr_hva2gpa(vm, pte),
+                                               PTE_GET_PFN(*pte),
+                                               !!(*pte & PTE_WRITABLE_MASK),
+                                               !!(*pte & PTE_NX_MASK),
+                                               !!(*pte & PTE_DIRTY_MASK),
+                                               ((uint64_t) n1 << 27)
+                                                       | ((uint64_t) n2 << 18)
+                                                       | ((uint64_t) n3 << 9)
+                                                       | ((uint64_t) n4));
+                               }
+                       }
+               }
+       }
+}
+
+/*
+ * Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *   segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by @segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+       memset(segp, 0, sizeof(*segp));
+       segp->unusable = true;
+}
+
+static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
+{
+       void *gdt = addr_gva2hva(vm, vm->arch.gdt);
+       struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
+
+       desc->limit0 = segp->limit & 0xFFFF;
+       desc->base0 = segp->base & 0xFFFF;
+       desc->base1 = segp->base >> 16;
+       desc->type = segp->type;
+       desc->s = segp->s;
+       desc->dpl = segp->dpl;
+       desc->p = segp->present;
+       desc->limit1 = segp->limit >> 16;
+       desc->avl = segp->avl;
+       desc->l = segp->l;
+       desc->db = segp->db;
+       desc->g = segp->g;
+       desc->base2 = segp->base >> 24;
+       if (!segp->s)
+               desc->base3 = segp->base >> 32;
+}
+
+static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp)
+{
+       memset(segp, 0, sizeof(*segp));
+       segp->selector = KERNEL_CS;
+       segp->limit = 0xFFFFFFFFu;
+       segp->s = 0x1; /* kTypeCodeData */
+       segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+                                         * | kFlagCodeReadable
+                                         */
+       segp->g = true;
+       segp->l = true;
+       segp->present = 1;
+}
+
+static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
+{
+       memset(segp, 0, sizeof(*segp));
+       segp->selector = KERNEL_DS;
+       segp->limit = 0xFFFFFFFFu;
+       segp->s = 0x1; /* kTypeCodeData */
+       segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+                                         * | kFlagDataWritable
+                                         */
+       segp->g = true;
+       segp->present = true;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+       int level = PG_LEVEL_NONE;
+       uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
+
+       TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+                   "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
+
+       /*
+        * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
+        * address bits to be zero.
+        */
+       return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
+}
+
+static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
+{
+       memset(segp, 0, sizeof(*segp));
+       segp->base = base;
+       segp->limit = 0x67;
+       segp->selector = KERNEL_TSS;
+       segp->type = 0xb;
+       segp->present = 1;
+}
+
+static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       struct kvm_sregs sregs;
+
+       TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+
+       /* Set mode specific system register values. */
+       vcpu_sregs_get(vcpu, &sregs);
+
+       sregs.idt.base = vm->arch.idt;
+       sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+       sregs.gdt.base = vm->arch.gdt;
+       sregs.gdt.limit = getpagesize() - 1;
+
+       sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+       sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
+       if (kvm_cpu_has(X86_FEATURE_XSAVE))
+               sregs.cr4 |= X86_CR4_OSXSAVE;
+       sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+       kvm_seg_set_unusable(&sregs.ldt);
+       kvm_seg_set_kernel_code_64bit(&sregs.cs);
+       kvm_seg_set_kernel_data_64bit(&sregs.ds);
+       kvm_seg_set_kernel_data_64bit(&sregs.es);
+       kvm_seg_set_kernel_data_64bit(&sregs.gs);
+       kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
+
+       sregs.cr3 = vm->pgd;
+       vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       struct kvm_xcrs xcrs = {
+               .nr_xcrs = 1,
+               .xcrs[0].xcr = 0,
+               .xcrs[0].value = kvm_cpu_supported_xcr0(),
+       };
+
+       if (!kvm_cpu_has(X86_FEATURE_XSAVE))
+               return;
+
+       vcpu_xcrs_set(vcpu, &xcrs);
+}
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+                         int dpl, unsigned short selector)
+{
+       struct idt_entry *base =
+               (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt);
+       struct idt_entry *e = &base[vector];
+
+       memset(e, 0, sizeof(*e));
+       e->offset0 = addr;
+       e->selector = selector;
+       e->ist = 0;
+       e->type = 14;
+       e->dpl = dpl;
+       e->p = 1;
+       e->offset1 = addr >> 16;
+       e->offset2 = addr >> 32;
+}
+
+static bool kvm_fixup_exception(struct ex_regs *regs)
+{
+       if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
+               return false;
+
+       if (regs->vector == DE_VECTOR)
+               return false;
+
+       regs->rip = regs->r11;
+       regs->r9 = regs->vector;
+       regs->r10 = regs->error_code;
+       return true;
+}
+
+void route_exception(struct ex_regs *regs)
+{
+       typedef void(*handler)(struct ex_regs *);
+       handler *handlers = (handler *)exception_handlers;
+
+       if (handlers && handlers[regs->vector]) {
+               handlers[regs->vector](regs);
+               return;
+       }
+
+       if (kvm_fixup_exception(regs))
+               return;
+
+       GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+                  regs->vector, regs->rip);
+}
+
+static void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+       extern void *idt_handlers;
+       struct kvm_segment seg;
+       int i;
+
+       vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+       vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+       vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+       vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+
+       /* Handlers have the same address in both address spaces.*/
+       for (i = 0; i < NUM_INTERRUPTS; i++)
+               set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
+
+       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+
+       kvm_seg_set_kernel_code_64bit(&seg);
+       kvm_seg_fill_gdt_64bit(vm, &seg);
+
+       kvm_seg_set_kernel_data_64bit(&seg);
+       kvm_seg_fill_gdt_64bit(vm, &seg);
+
+       kvm_seg_set_tss_64bit(vm->arch.tss, &seg);
+       kvm_seg_fill_gdt_64bit(vm, &seg);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+                              void (*handler)(struct ex_regs *))
+{
+       vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+       handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+               REPORT_GUEST_ASSERT(uc);
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+       int r;
+
+       TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ),
+                   "Require KVM_GET_TSC_KHZ to provide udelay() to guest.");
+
+       vm_create_irqchip(vm);
+       vm_init_descriptor_tables(vm);
+
+       sync_global_to_guest(vm, host_cpu_is_intel);
+       sync_global_to_guest(vm, host_cpu_is_amd);
+       sync_global_to_guest(vm, is_forced_emulation_enabled);
+
+       if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+               struct kvm_sev_init init = { 0 };
+
+               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+       }
+
+       r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL);
+       TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency.");
+       guest_tsc_khz = r;
+       sync_global_to_guest(vm, guest_tsc_khz);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       struct kvm_regs regs;
+
+       vcpu_regs_get(vcpu, &regs);
+       regs.rip = (unsigned long) guest_code;
+       vcpu_regs_set(vcpu, &regs);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+       struct kvm_mp_state mp_state;
+       struct kvm_regs regs;
+       vm_vaddr_t stack_vaddr;
+       struct kvm_vcpu *vcpu;
+
+       stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+                                      DEFAULT_GUEST_STACK_VADDR_MIN,
+                                      MEM_REGION_DATA);
+
+       stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
+
+       /*
+        * Align stack to match calling sequence requirements in section "The
+        * Stack Frame" of the System V ABI AMD64 Architecture Processor
+        * Supplement, which requires the value (%rsp + 8) to be a multiple of
+        * 16 when control is transferred to the function entry point.
+        *
+        * If this code is ever used to launch a vCPU with 32-bit entry point it
+        * may need to subtract 4 bytes instead of 8 bytes.
+        */
+       TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
+                   "__vm_vaddr_alloc() did not provide a page-aligned address");
+       stack_vaddr -= 8;
+
+       vcpu = __vm_vcpu_add(vm, vcpu_id);
+       vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+       vcpu_init_sregs(vm, vcpu);
+       vcpu_init_xcrs(vm, vcpu);
+
+       /* Setup guest general purpose registers */
+       vcpu_regs_get(vcpu, &regs);
+       regs.rflags = regs.rflags | 0x2;
+       regs.rsp = stack_vaddr;
+       vcpu_regs_set(vcpu, &regs);
+
+       /* Setup the MP state */
+       mp_state.mp_state = 0;
+       vcpu_mp_state_set(vcpu, &mp_state);
+
+       /*
+        * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime"
+        * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are
+        * reflected into selftests' vCPU CPUID cache, i.e. so that the cache
+        * is consistent with vCPU state.
+        */
+       vcpu_get_cpuid(vcpu);
+       return vcpu;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+       struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+       vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+
+       return vcpu;
+}
+
+void vcpu_arch_free(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->cpuid)
+               free(vcpu->cpuid);
+}
+
+/* Do not use kvm_supported_cpuid directly except for validity checks. */
+static void *kvm_supported_cpuid;
+
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+       int kvm_fd;
+
+       if (kvm_supported_cpuid)
+               return kvm_supported_cpuid;
+
+       kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+       kvm_fd = open_kvm_dev_path_or_exit();
+
+       kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
+                 (struct kvm_cpuid2 *)kvm_supported_cpuid);
+
+       close(kvm_fd);
+       return kvm_supported_cpuid;
+}
+
+static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
+                             uint32_t function, uint32_t index,
+                             uint8_t reg, uint8_t lo, uint8_t hi)
+{
+       const struct kvm_cpuid_entry2 *entry;
+       int i;
+
+       for (i = 0; i < cpuid->nent; i++) {
+               entry = &cpuid->entries[i];
+
+               /*
+                * The output registers in kvm_cpuid_entry2 are in alphabetical
+                * order, but kvm_x86_cpu_feature matches that mess, so yay
+                * pointer shenanigans!
+                */
+               if (entry->function == function && entry->index == index)
+                       return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
+       }
+
+       return 0;
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+                  struct kvm_x86_cpu_feature feature)
+{
+       return __kvm_cpu_has(cpuid, feature.function, feature.index,
+                            feature.reg, feature.bit, feature.bit);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+                           struct kvm_x86_cpu_property property)
+{
+       return __kvm_cpu_has(cpuid, property.function, property.index,
+                            property.reg, property.lo_bit, property.hi_bit);
+}
+
+uint64_t kvm_get_feature_msr(uint64_t msr_index)
+{
+       struct {
+               struct kvm_msrs header;
+               struct kvm_msr_entry entry;
+       } buffer = {};
+       int r, kvm_fd;
+
+       buffer.header.nmsrs = 1;
+       buffer.entry.index = msr_index;
+       kvm_fd = open_kvm_dev_path_or_exit();
+
+       r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
+       TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
+
+       close(kvm_fd);
+       return buffer.entry.data;
+}
+
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
+{
+       int kvm_fd;
+       u64 bitmask;
+       long rc;
+       struct kvm_device_attr attr = {
+               .group = 0,
+               .attr = KVM_X86_XCOMP_GUEST_SUPP,
+               .addr = (unsigned long) &bitmask,
+       };
+
+       TEST_ASSERT(!kvm_supported_cpuid,
+                   "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
+
+       TEST_ASSERT(is_power_of_2(xfeature),
+                   "Dynamic XFeatures must be enabled one at a time");
+
+       kvm_fd = open_kvm_dev_path_or_exit();
+       rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+       close(kvm_fd);
+
+       if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+               __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
+
+       TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
+
+       __TEST_REQUIRE(bitmask & xfeature,
+                      "Required XSAVE feature '%s' not supported", name);
+
+       TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
+
+       rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+       TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+       TEST_ASSERT(bitmask & xfeature,
+                   "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
+                   name, xfeature, bitmask);
+}
+
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
+{
+       TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
+
+       /* Allow overriding the default CPUID. */
+       if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
+               free(vcpu->cpuid);
+               vcpu->cpuid = NULL;
+       }
+
+       if (!vcpu->cpuid)
+               vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
+
+       memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
+       vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+                            struct kvm_x86_cpu_property property,
+                            uint32_t value)
+{
+       struct kvm_cpuid_entry2 *entry;
+
+       entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
+
+       (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
+       (&entry->eax)[property.reg] |= value << property.lo_bit;
+
+       vcpu_set_cpuid(vcpu);
+
+       /* Sanity check that @value doesn't exceed the bounds in any way. */
+       TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
+}
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
+{
+       struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
+
+       entry->eax = 0;
+       entry->ebx = 0;
+       entry->ecx = 0;
+       entry->edx = 0;
+       vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+                                    struct kvm_x86_cpu_feature feature,
+                                    bool set)
+{
+       struct kvm_cpuid_entry2 *entry;
+       u32 *reg;
+
+       entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+       reg = (&entry->eax) + feature.reg;
+
+       if (set)
+               *reg |= BIT(feature.bit);
+       else
+               *reg &= ~BIT(feature.bit);
+
+       vcpu_set_cpuid(vcpu);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
+{
+       struct {
+               struct kvm_msrs header;
+               struct kvm_msr_entry entry;
+       } buffer = {};
+
+       buffer.header.nmsrs = 1;
+       buffer.entry.index = msr_index;
+
+       vcpu_msrs_get(vcpu, &buffer.header);
+
+       return buffer.entry.data;
+}
+
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
+{
+       struct {
+               struct kvm_msrs header;
+               struct kvm_msr_entry entry;
+       } buffer = {};
+
+       memset(&buffer, 0, sizeof(buffer));
+       buffer.header.nmsrs = 1;
+       buffer.entry.index = msr_index;
+       buffer.entry.data = msr_value;
+
+       return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+       va_list ap;
+       struct kvm_regs regs;
+
+       TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+                   "  num: %u",
+                   num);
+
+       va_start(ap, num);
+       vcpu_regs_get(vcpu, &regs);
+
+       if (num >= 1)
+               regs.rdi = va_arg(ap, uint64_t);
+
+       if (num >= 2)
+               regs.rsi = va_arg(ap, uint64_t);
+
+       if (num >= 3)
+               regs.rdx = va_arg(ap, uint64_t);
+
+       if (num >= 4)
+               regs.rcx = va_arg(ap, uint64_t);
+
+       if (num >= 5)
+               regs.r8 = va_arg(ap, uint64_t);
+
+       if (num >= 6)
+               regs.r9 = va_arg(ap, uint64_t);
+
+       vcpu_regs_set(vcpu, &regs);
+       va_end(ap);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+       struct kvm_regs regs;
+       struct kvm_sregs sregs;
+
+       fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
+
+       fprintf(stream, "%*sregs:\n", indent + 2, "");
+       vcpu_regs_get(vcpu, &regs);
+       regs_dump(stream, &regs, indent + 4);
+
+       fprintf(stream, "%*ssregs:\n", indent + 2, "");
+       vcpu_sregs_get(vcpu, &sregs);
+       sregs_dump(stream, &sregs, indent + 4);
+}
+
+static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
+{
+       struct kvm_msr_list *list;
+       struct kvm_msr_list nmsrs;
+       int kvm_fd, r;
+
+       kvm_fd = open_kvm_dev_path_or_exit();
+
+       nmsrs.nmsrs = 0;
+       if (!feature_msrs)
+               r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+       else
+               r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
+
+       TEST_ASSERT(r == -1 && errno == E2BIG,
+                   "Expected -E2BIG, got rc: %i errno: %i (%s)",
+                   r, errno, strerror(errno));
+
+       list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
+       TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
+       list->nmsrs = nmsrs.nmsrs;
+
+       if (!feature_msrs)
+               kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+       else
+               kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+       close(kvm_fd);
+
+       TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
+                   "Number of MSRs in list changed, was %d, now %d",
+                   nmsrs.nmsrs, list->nmsrs);
+       return list;
+}
+
+const struct kvm_msr_list *kvm_get_msr_index_list(void)
+{
+       static const struct kvm_msr_list *list;
+
+       if (!list)
+               list = __kvm_get_msr_index_list(false);
+       return list;
+}
+
+
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
+{
+       static const struct kvm_msr_list *list;
+
+       if (!list)
+               list = __kvm_get_msr_index_list(true);
+       return list;
+}
+
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
+{
+       const struct kvm_msr_list *list = kvm_get_msr_index_list();
+       int i;
+
+       for (i = 0; i < list->nmsrs; ++i) {
+               if (list->indices[i] == msr_index)
+                       return true;
+       }
+
+       return false;
+}
+
+static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
+                                 struct kvm_x86_state *state)
+{
+       int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
+
+       if (size) {
+               state->xsave = malloc(size);
+               vcpu_xsave2_get(vcpu, state->xsave);
+       } else {
+               state->xsave = malloc(sizeof(struct kvm_xsave));
+               vcpu_xsave_get(vcpu, state->xsave);
+       }
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
+{
+       const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
+       struct kvm_x86_state *state;
+       int i;
+
+       static int nested_size = -1;
+
+       if (nested_size == -1) {
+               nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
+               TEST_ASSERT(nested_size <= sizeof(state->nested_),
+                           "Nested state size too big, %i > %zi",
+                           nested_size, sizeof(state->nested_));
+       }
+
+       /*
+        * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
+        * guest state is consistent only after userspace re-enters the
+        * kernel with KVM_RUN.  Complete IO prior to migrating state
+        * to a new VM.
+        */
+       vcpu_run_complete_io(vcpu);
+
+       state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
+       TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
+
+       vcpu_events_get(vcpu, &state->events);
+       vcpu_mp_state_get(vcpu, &state->mp_state);
+       vcpu_regs_get(vcpu, &state->regs);
+       vcpu_save_xsave_state(vcpu, state);
+
+       if (kvm_has_cap(KVM_CAP_XCRS))
+               vcpu_xcrs_get(vcpu, &state->xcrs);
+
+       vcpu_sregs_get(vcpu, &state->sregs);
+
+       if (nested_size) {
+               state->nested.size = sizeof(state->nested_);
+
+               vcpu_nested_state_get(vcpu, &state->nested);
+               TEST_ASSERT(state->nested.size <= nested_size,
+                           "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+                           state->nested.size, nested_size);
+       } else {
+               state->nested.size = 0;
+       }
+
+       state->msrs.nmsrs = msr_list->nmsrs;
+       for (i = 0; i < msr_list->nmsrs; i++)
+               state->msrs.entries[i].index = msr_list->indices[i];
+       vcpu_msrs_get(vcpu, &state->msrs);
+
+       vcpu_debugregs_get(vcpu, &state->debugregs);
+
+       return state;
+}
+
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
+{
+       vcpu_sregs_set(vcpu, &state->sregs);
+       vcpu_msrs_set(vcpu, &state->msrs);
+
+       if (kvm_has_cap(KVM_CAP_XCRS))
+               vcpu_xcrs_set(vcpu, &state->xcrs);
+
+       vcpu_xsave_set(vcpu,  state->xsave);
+       vcpu_events_set(vcpu, &state->events);
+       vcpu_mp_state_set(vcpu, &state->mp_state);
+       vcpu_debugregs_set(vcpu, &state->debugregs);
+       vcpu_regs_set(vcpu, &state->regs);
+
+       if (state->nested.size)
+               vcpu_nested_state_set(vcpu, &state->nested);
+}
+
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
+{
+       free(state->xsave);
+       free(state);
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+       if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
+               *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
+               *va_bits = 32;
+       } else {
+               *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+               *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
+       }
+}
+
+void kvm_init_vm_address_properties(struct kvm_vm *vm)
+{
+       if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+               vm->arch.sev_fd = open_sev_dev_path_or_exit();
+               vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
+               vm->gpa_tag_mask = vm->arch.c_bit;
+       } else {
+               vm->arch.sev_fd = -1;
+       }
+}
+
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+                                              uint32_t function, uint32_t index)
+{
+       int i;
+
+       for (i = 0; i < cpuid->nent; i++) {
+               if (cpuid->entries[i].function == function &&
+                   cpuid->entries[i].index == index)
+                       return &cpuid->entries[i];
+       }
+
+       TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
+
+       return NULL;
+}
+
+#define X86_HYPERCALL(inputs...)                                       \
+({                                                                     \
+       uint64_t r;                                                     \
+                                                                       \
+       asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t"          \
+                    "jnz 1f\n\t"                                       \
+                    "vmcall\n\t"                                       \
+                    "jmp 2f\n\t"                                       \
+                    "1: vmmcall\n\t"                                   \
+                    "2:"                                               \
+                    : "=a"(r)                                          \
+                    : [use_vmmcall] "r" (host_cpu_is_amd), inputs);    \
+                                                                       \
+       r;                                                              \
+})
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+                      uint64_t a3)
+{
+       return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+}
+
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+       return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
+}
+
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+       GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
+}
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
+{
+       const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
+       unsigned long ht_gfn, max_gfn, max_pfn;
+       uint8_t maxphyaddr, guest_maxphyaddr;
+
+       /*
+        * Use "guest MAXPHYADDR" from KVM if it's available.  Guest MAXPHYADDR
+        * enumerates the max _mappable_ GPA, which can be less than the raw
+        * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
+        * doesn't support 5-level TDP.
+        */
+       guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
+       guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
+       TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
+                   "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
+
+       max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
+
+       /* Avoid reserved HyperTransport region on AMD processors.  */
+       if (!host_cpu_is_amd)
+               return max_gfn;
+
+       /* On parts with <40 physical address bits, the area is fully hidden */
+       if (vm->pa_bits < 40)
+               return max_gfn;
+
+       /* Before family 17h, the HyperTransport area is just below 1T.  */
+       ht_gfn = (1 << 28) - num_ht_pages;
+       if (this_cpu_family() < 0x17)
+               goto done;
+
+       /*
+        * Otherwise it's at the top of the physical address space, possibly
+        * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX.  Use
+        * the old conservative value if MAXPHYADDR is not enumerated.
+        */
+       if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
+               goto done;
+
+       maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+       max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
+
+       if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
+               max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
+
+       ht_gfn = max_pfn - num_ht_pages;
+done:
+       return min(max_gfn, ht_gfn - 1);
+}
+
+/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
+bool vm_is_unrestricted_guest(struct kvm_vm *vm)
+{
+       /* Ensure that a KVM vendor-specific module is loaded. */
+       if (vm == NULL)
+               close(open_kvm_dev_path_or_exit());
+
+       return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
+void kvm_selftest_arch_init(void)
+{
+       host_cpu_is_intel = this_cpu_is_intel();
+       host_cpu_is_amd = this_cpu_is_amd();
+       is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+}
+
+bool sys_clocksource_is_based_on_tsc(void)
+{
+       char *clk_name = sys_get_cur_clocksource();
+       bool ret = !strcmp(clk_name, "tsc\n") ||
+                  !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
+
+       free(clk_name);
+
+       return ret;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
new file mode 100644 (file)
index 0000000..e9535ee
--- /dev/null
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "sev.h"
+
+/*
+ * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
+ * -1 would then cause an underflow back to 2**64 - 1. This is expected and
+ * correct.
+ *
+ * If the last range in the sparsebit is [x, y] and we try to iterate,
+ * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
+ * and find the first range, but that's correct because the condition
+ * expression would cause us to quit the loop.
+ */
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+{
+       const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
+       const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+       const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
+       sparsebit_idx_t i, j;
+
+       if (!sparsebit_any_set(protected_phy_pages))
+               return;
+
+       sev_register_encrypted_memory(vm, region);
+
+       sparsebit_for_each_set_range(protected_phy_pages, i, j) {
+               const uint64_t size = (j - i + 1) * vm->page_size;
+               const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
+
+               sev_launch_update_data(vm, gpa_base + offset, size);
+       }
+}
+
+void sev_vm_init(struct kvm_vm *vm)
+{
+       if (vm->type == KVM_X86_DEFAULT_VM) {
+               assert(vm->arch.sev_fd == -1);
+               vm->arch.sev_fd = open_sev_dev_path_or_exit();
+               vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
+       } else {
+               struct kvm_sev_init init = { 0 };
+               assert(vm->type == KVM_X86_SEV_VM);
+               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+       }
+}
+
+void sev_es_vm_init(struct kvm_vm *vm)
+{
+       if (vm->type == KVM_X86_DEFAULT_VM) {
+               assert(vm->arch.sev_fd == -1);
+               vm->arch.sev_fd = open_sev_dev_path_or_exit();
+               vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
+       } else {
+               struct kvm_sev_init init = { 0 };
+               assert(vm->type == KVM_X86_SEV_ES_VM);
+               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+       }
+}
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
+{
+       struct kvm_sev_launch_start launch_start = {
+               .policy = policy,
+       };
+       struct userspace_mem_region *region;
+       struct kvm_sev_guest_status status;
+       int ctr;
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+
+       TEST_ASSERT_EQ(status.policy, policy);
+       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
+
+       hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+               encrypt_region(vm, region);
+
+       if (policy & SEV_POLICY_ES)
+               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+       vm->arch.is_pt_protected = true;
+}
+
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
+{
+       struct kvm_sev_launch_measure launch_measure;
+       struct kvm_sev_guest_status guest_status;
+
+       launch_measure.len = 256;
+       launch_measure.uaddr = (__u64)measurement;
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
+       TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
+}
+
+void sev_vm_launch_finish(struct kvm_vm *vm)
+{
+       struct kvm_sev_guest_status status;
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+       TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
+                   status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
+                   "Unexpected guest state: %d", status.state);
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
+}
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+                                          struct kvm_vcpu **cpu)
+{
+       struct vm_shape shape = {
+               .mode = VM_MODE_DEFAULT,
+               .type = type,
+       };
+       struct kvm_vm *vm;
+       struct kvm_vcpu *cpus[1];
+
+       vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
+       *cpu = cpus[0];
+
+       return vm;
+}
+
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+{
+       sev_vm_launch(vm, policy);
+
+       if (!measurement)
+               measurement = alloca(256);
+
+       sev_vm_launch_measure(vm, measurement);
+
+       sev_vm_launch_finish(vm);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c
new file mode 100644 (file)
index 0000000..d239c20
--- /dev/null
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helpers used for nested SVM testing
+ * Largely inspired from KVM unit test svm.c
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+#define SEV_DEV_PATH "/dev/sev"
+
+struct gpr64_regs guest_regs;
+u64 rflags;
+
+/* Allocate memory regions for nested SVM tests.
+ *
+ * Input Args:
+ *   vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ *   p_svm_gva - The guest virtual address for the struct svm_test_data.
+ *
+ * Return:
+ *   Pointer to structure with the addresses of the SVM areas.
+ */
+struct svm_test_data *
+vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
+{
+       vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
+       struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
+
+       svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
+       svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
+       svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
+
+       svm->save_area = (void *)vm_vaddr_alloc_page(vm);
+       svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
+       svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
+
+       svm->msr = (void *)vm_vaddr_alloc_page(vm);
+       svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
+       svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
+       memset(svm->msr_hva, 0, getpagesize());
+
+       *p_svm_gva = svm_gva;
+       return svm;
+}
+
+static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+                        u64 base, u32 limit, u32 attr)
+{
+       seg->selector = selector;
+       seg->attrib = attr;
+       seg->limit = limit;
+       seg->base = base;
+}
+
+void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
+{
+       struct vmcb *vmcb = svm->vmcb;
+       uint64_t vmcb_gpa = svm->vmcb_gpa;
+       struct vmcb_save_area *save = &vmcb->save;
+       struct vmcb_control_area *ctrl = &vmcb->control;
+       u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+             | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
+       u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+               | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
+       uint64_t efer;
+
+       efer = rdmsr(MSR_EFER);
+       wrmsr(MSR_EFER, efer | EFER_SVME);
+       wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
+
+       memset(vmcb, 0, sizeof(*vmcb));
+       asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
+       vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
+       vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
+       vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
+       vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr);
+       vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0);
+       vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0);
+
+       ctrl->asid = 1;
+       save->cpl = 0;
+       save->efer = rdmsr(MSR_EFER);
+       asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory");
+       asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory");
+       asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory");
+       asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory");
+       asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory");
+       asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory");
+       save->g_pat = rdmsr(MSR_IA32_CR_PAT);
+       save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
+       ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
+                               (1ULL << INTERCEPT_VMMCALL);
+       ctrl->msrpm_base_pa = svm->msr_gpa;
+
+       vmcb->save.rip = (u64)guest_rip;
+       vmcb->save.rsp = (u64)guest_rsp;
+       guest_regs.rdi = (u64)svm;
+}
+
+/*
+ * save/restore 64-bit general registers except rax, rip, rsp
+ * which are directly handed through the VMCB guest processor state
+ */
+#define SAVE_GPR_C                             \
+       "xchg %%rbx, guest_regs+0x20\n\t"       \
+       "xchg %%rcx, guest_regs+0x10\n\t"       \
+       "xchg %%rdx, guest_regs+0x18\n\t"       \
+       "xchg %%rbp, guest_regs+0x30\n\t"       \
+       "xchg %%rsi, guest_regs+0x38\n\t"       \
+       "xchg %%rdi, guest_regs+0x40\n\t"       \
+       "xchg %%r8,  guest_regs+0x48\n\t"       \
+       "xchg %%r9,  guest_regs+0x50\n\t"       \
+       "xchg %%r10, guest_regs+0x58\n\t"       \
+       "xchg %%r11, guest_regs+0x60\n\t"       \
+       "xchg %%r12, guest_regs+0x68\n\t"       \
+       "xchg %%r13, guest_regs+0x70\n\t"       \
+       "xchg %%r14, guest_regs+0x78\n\t"       \
+       "xchg %%r15, guest_regs+0x80\n\t"
+
+#define LOAD_GPR_C      SAVE_GPR_C
+
+/*
+ * selftests do not use interrupts so we dropped clgi/sti/cli/stgi
+ * for now. registers involved in LOAD/SAVE_GPR_C are eventually
+ * unmodified so they do not need to be in the clobber list.
+ */
+void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
+{
+       asm volatile (
+               "vmload %[vmcb_gpa]\n\t"
+               "mov rflags, %%r15\n\t" // rflags
+               "mov %%r15, 0x170(%[vmcb])\n\t"
+               "mov guest_regs, %%r15\n\t"     // rax
+               "mov %%r15, 0x1f8(%[vmcb])\n\t"
+               LOAD_GPR_C
+               "vmrun %[vmcb_gpa]\n\t"
+               SAVE_GPR_C
+               "mov 0x170(%[vmcb]), %%r15\n\t" // rflags
+               "mov %%r15, rflags\n\t"
+               "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
+               "mov %%r15, guest_regs\n\t"
+               "vmsave %[vmcb_gpa]\n\t"
+               : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
+               : "r15", "memory");
+}
+
+/*
+ * Open SEV_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Return:
+ *   The opened file descriptor of /dev/sev.
+ */
+int open_sev_dev_path_or_exit(void)
+{
+       return open_path_or_exit(SEV_DEV_PATH, 0);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c
new file mode 100644 (file)
index 0000000..1265cec
--- /dev/null
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       /*
+        * FIXME: Revert this hack (the entire commit that added it) once nVMX
+        * preserves L2 GPRs across a nested VM-Exit.  If a ucall from L2, e.g.
+        * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+        * clobbered by L1.  Save and restore non-volatile GPRs (clobbering RBP
+        * in particular is problematic) along with RDX and RDI (which are
+        * inputs), and clobber volatile GPRs. *sigh*
+        */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+       "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+       asm volatile("push %%rbp\n\t"
+                    "push %%r15\n\t"
+                    "push %%r14\n\t"
+                    "push %%r13\n\t"
+                    "push %%r12\n\t"
+                    "push %%rbx\n\t"
+                    "push %%rdx\n\t"
+                    "push %%rdi\n\t"
+                    "in %[port], %%al\n\t"
+                    "pop %%rdi\n\t"
+                    "pop %%rdx\n\t"
+                    "pop %%rbx\n\t"
+                    "pop %%r12\n\t"
+                    "pop %%r13\n\t"
+                    "pop %%r14\n\t"
+                    "pop %%r15\n\t"
+                    "pop %%rbp\n\t"
+               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+                    HORRIFIC_L2_UCALL_CLOBBER_HACK);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+
+       if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
+               struct kvm_regs regs;
+
+               vcpu_regs_get(vcpu, &regs);
+               return (void *)regs.rdi;
+       }
+       return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
new file mode 100644 (file)
index 0000000..d4d1208
--- /dev/null
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+
+#include <asm/msr-index.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define PAGE_SHIFT_4K  12
+
+#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
+
+bool enable_evmcs;
+
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
+struct eptPageTableEntry {
+       uint64_t readable:1;
+       uint64_t writable:1;
+       uint64_t executable:1;
+       uint64_t memory_type:3;
+       uint64_t ignore_pat:1;
+       uint64_t page_size:1;
+       uint64_t accessed:1;
+       uint64_t dirty:1;
+       uint64_t ignored_11_10:2;
+       uint64_t address:40;
+       uint64_t ignored_62_52:11;
+       uint64_t suppress_ve:1;
+};
+
+struct eptPageTablePointer {
+       uint64_t memory_type:3;
+       uint64_t page_walk_length:3;
+       uint64_t ad_enabled:1;
+       uint64_t reserved_11_07:5;
+       uint64_t address:40;
+       uint64_t reserved_63_52:12;
+};
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
+{
+       uint16_t evmcs_ver;
+
+       vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+                       (unsigned long)&evmcs_ver);
+
+       /* KVM should return supported EVMCS version range */
+       TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
+                   (evmcs_ver & 0xff) > 0,
+                   "Incorrect EVMCS version range: %x:%x",
+                   evmcs_ver & 0xff, evmcs_ver >> 8);
+
+       return evmcs_ver;
+}
+
+/* Allocate memory regions for nested VMX tests.
+ *
+ * Input Args:
+ *   vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ *   p_vmx_gva - The guest virtual address for the struct vmx_pages.
+ *
+ * Return:
+ *   Pointer to structure with the addresses of the VMX areas.
+ */
+struct vmx_pages *
+vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
+{
+       vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
+       struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
+
+       /* Setup of a region of guest memory for the vmxon region. */
+       vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
+       vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
+       vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
+
+       /* Setup of a region of guest memory for a vmcs. */
+       vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
+       vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
+       vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
+
+       /* Setup of a region of guest memory for the MSR bitmap. */
+       vmx->msr = (void *)vm_vaddr_alloc_page(vm);
+       vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
+       vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
+       memset(vmx->msr_hva, 0, getpagesize());
+
+       /* Setup of a region of guest memory for the shadow VMCS. */
+       vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
+       vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
+       vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
+
+       /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
+       vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
+       vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
+       vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
+       memset(vmx->vmread_hva, 0, getpagesize());
+
+       vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
+       vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
+       vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
+       memset(vmx->vmwrite_hva, 0, getpagesize());
+
+       *p_vmx_gva = vmx_gva;
+       return vmx;
+}
+
+bool prepare_for_vmx_operation(struct vmx_pages *vmx)
+{
+       uint64_t feature_control;
+       uint64_t required;
+       unsigned long cr0;
+       unsigned long cr4;
+
+       /*
+        * Ensure bits in CR0 and CR4 are valid in VMX operation:
+        * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+        * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+        */
+       __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+       cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+       cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+       __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+       __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+       cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+       cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+       /* Enable VMX operation */
+       cr4 |= X86_CR4_VMXE;
+       __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+       /*
+        * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+        *  Bit 0: Lock bit. If clear, VMXON causes a #GP.
+        *  Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+        *    outside of SMX causes a #GP.
+        */
+       required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+       required |= FEAT_CTL_LOCKED;
+       feature_control = rdmsr(MSR_IA32_FEAT_CTL);
+       if ((feature_control & required) != required)
+               wrmsr(MSR_IA32_FEAT_CTL, feature_control | required);
+
+       /* Enter VMX root operation. */
+       *(uint32_t *)(vmx->vmxon) = vmcs_revision();
+       if (vmxon(vmx->vmxon_gpa))
+               return false;
+
+       return true;
+}
+
+bool load_vmcs(struct vmx_pages *vmx)
+{
+       /* Load a VMCS. */
+       *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+       if (vmclear(vmx->vmcs_gpa))
+               return false;
+
+       if (vmptrld(vmx->vmcs_gpa))
+               return false;
+
+       /* Setup shadow VMCS, do not load it yet. */
+       *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+       if (vmclear(vmx->shadow_vmcs_gpa))
+               return false;
+
+       return true;
+}
+
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+       return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
+bool ept_1g_pages_supported(void)
+{
+       return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
+{
+       uint32_t sec_exec_ctl = 0;
+
+       vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+       vmwrite(POSTED_INTR_NV, 0);
+
+       vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
+
+       if (vmx->eptp_gpa) {
+               uint64_t ept_paddr;
+               struct eptPageTablePointer eptp = {
+                       .memory_type = X86_MEMTYPE_WB,
+                       .page_walk_length = 3, /* + 1 */
+                       .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
+                       .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
+               };
+
+               memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
+               vmwrite(EPT_POINTER, ept_paddr);
+               sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
+       }
+
+       if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
+               vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+                       rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+       else {
+               vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
+               GUEST_ASSERT(!sec_exec_ctl);
+       }
+
+       vmwrite(EXCEPTION_BITMAP, 0);
+       vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+       vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+       vmwrite(CR3_TARGET_COUNT, 0);
+       vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+               VM_EXIT_HOST_ADDR_SPACE_SIZE);    /* 64-bit host */
+       vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+       vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+       vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+               VM_ENTRY_IA32E_MODE);             /* 64-bit guest */
+       vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+       vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+       vmwrite(TPR_THRESHOLD, 0);
+
+       vmwrite(CR0_GUEST_HOST_MASK, 0);
+       vmwrite(CR4_GUEST_HOST_MASK, 0);
+       vmwrite(CR0_READ_SHADOW, get_cr0());
+       vmwrite(CR4_READ_SHADOW, get_cr4());
+
+       vmwrite(MSR_BITMAP, vmx->msr_gpa);
+       vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
+       vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+       uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+       vmwrite(HOST_ES_SELECTOR, get_es());
+       vmwrite(HOST_CS_SELECTOR, get_cs());
+       vmwrite(HOST_SS_SELECTOR, get_ss());
+       vmwrite(HOST_DS_SELECTOR, get_ds());
+       vmwrite(HOST_FS_SELECTOR, get_fs());
+       vmwrite(HOST_GS_SELECTOR, get_gs());
+       vmwrite(HOST_TR_SELECTOR, get_tr());
+
+       if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+               vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+       if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+               vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+       if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+               vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+                       rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+       vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+       vmwrite(HOST_CR0, get_cr0());
+       vmwrite(HOST_CR3, get_cr3());
+       vmwrite(HOST_CR4, get_cr4());
+       vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+       vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+       vmwrite(HOST_TR_BASE,
+               get_desc64_base((struct desc64 *)(get_gdt().address + get_tr())));
+       vmwrite(HOST_GDTR_BASE, get_gdt().address);
+       vmwrite(HOST_IDTR_BASE, get_idt().address);
+       vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+       vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+       vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+       vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+       vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+       vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+       vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+       vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+       vmwrite(GUEST_LDTR_SELECTOR, 0);
+       vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+       vmwrite(GUEST_INTR_STATUS, 0);
+       vmwrite(GUEST_PML_INDEX, 0);
+
+       vmwrite(VMCS_LINK_POINTER, -1ll);
+       vmwrite(GUEST_IA32_DEBUGCTL, 0);
+       vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+       vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+       vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+               vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+       vmwrite(GUEST_ES_LIMIT, -1);
+       vmwrite(GUEST_CS_LIMIT, -1);
+       vmwrite(GUEST_SS_LIMIT, -1);
+       vmwrite(GUEST_DS_LIMIT, -1);
+       vmwrite(GUEST_FS_LIMIT, -1);
+       vmwrite(GUEST_GS_LIMIT, -1);
+       vmwrite(GUEST_LDTR_LIMIT, -1);
+       vmwrite(GUEST_TR_LIMIT, 0x67);
+       vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+       vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+       vmwrite(GUEST_ES_AR_BYTES,
+               vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+       vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+       vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+       vmwrite(GUEST_DS_AR_BYTES,
+               vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+       vmwrite(GUEST_FS_AR_BYTES,
+               vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+       vmwrite(GUEST_GS_AR_BYTES,
+               vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+       vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+       vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+       vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+       vmwrite(GUEST_ACTIVITY_STATE, 0);
+       vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+       vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+       vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+       vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+       vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+       vmwrite(GUEST_ES_BASE, 0);
+       vmwrite(GUEST_CS_BASE, 0);
+       vmwrite(GUEST_SS_BASE, 0);
+       vmwrite(GUEST_DS_BASE, 0);
+       vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+       vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+       vmwrite(GUEST_LDTR_BASE, 0);
+       vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+       vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+       vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+       vmwrite(GUEST_DR7, 0x400);
+       vmwrite(GUEST_RSP, (uint64_t)rsp);
+       vmwrite(GUEST_RIP, (uint64_t)rip);
+       vmwrite(GUEST_RFLAGS, 2);
+       vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+       vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+       vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
+{
+       init_vmcs_control_fields(vmx);
+       init_vmcs_host_state();
+       init_vmcs_guest_state(guest_rip, guest_rsp);
+}
+
+static void nested_create_pte(struct kvm_vm *vm,
+                             struct eptPageTableEntry *pte,
+                             uint64_t nested_paddr,
+                             uint64_t paddr,
+                             int current_level,
+                             int target_level)
+{
+       if (!pte->readable) {
+               pte->writable = true;
+               pte->readable = true;
+               pte->executable = true;
+               pte->page_size = (current_level == target_level);
+               if (pte->page_size)
+                       pte->address = paddr >> vm->page_shift;
+               else
+                       pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+       } else {
+               /*
+                * Entry already present.  Assert that the caller doesn't want
+                * a hugepage at this level, and that there isn't a hugepage at
+                * this level.
+                */
+               TEST_ASSERT(current_level != target_level,
+                           "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
+                           current_level, nested_paddr);
+               TEST_ASSERT(!pte->page_size,
+                           "Cannot create page table at level: %u, nested_paddr: 0x%lx",
+                           current_level, nested_paddr);
+       }
+}
+
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                    uint64_t nested_paddr, uint64_t paddr, int target_level)
+{
+       const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+       struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+       uint16_t index;
+
+       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+                   "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+       TEST_ASSERT((nested_paddr >> 48) == 0,
+                   "Nested physical address 0x%lx requires 5-level paging",
+                   nested_paddr);
+       TEST_ASSERT((nested_paddr % page_size) == 0,
+                   "Nested physical address not on page boundary,\n"
+                   "  nested_paddr: 0x%lx page_size: 0x%lx",
+                   nested_paddr, page_size);
+       TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
+                   "Physical address beyond beyond maximum supported,\n"
+                   "  nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+                   paddr, vm->max_gfn, vm->page_size);
+       TEST_ASSERT((paddr % page_size) == 0,
+                   "Physical address not on page boundary,\n"
+                   "  paddr: 0x%lx page_size: 0x%lx",
+                   paddr, page_size);
+       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+                   "Physical address beyond beyond maximum supported,\n"
+                   "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+                   paddr, vm->max_gfn, vm->page_size);
+
+       for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+               index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+               pte = &pt[index];
+
+               nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
+
+               if (pte->page_size)
+                       break;
+
+               pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+       }
+
+       /*
+        * For now mark these as accessed and dirty because the only
+        * testcase we have needs that.  Can be reconsidered later.
+        */
+       pte->accessed = true;
+       pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                  uint64_t nested_paddr, uint64_t paddr)
+{
+       __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
+}
+
+/*
+ * Map a range of EPT guest physical addresses to the VM's physical address
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   nested_paddr - Nested guest physical address to map
+ *   paddr - VM Physical Address
+ *   size - The size of the range to map
+ *   level - The level at which to map the range
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a nested guest translation for the
+ * page range starting at nested_paddr to the page range starting at paddr.
+ */
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+                 uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+                 int level)
+{
+       size_t page_size = PG_LEVEL_SIZE(level);
+       size_t npages = size / page_size;
+
+       TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
+       TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+       while (npages--) {
+               __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
+               nested_paddr += page_size;
+               paddr += page_size;
+       }
+}
+
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+               uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+       __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
+/* Prepare an identity extended page table that maps all the
+ * physical pages in VM.
+ */
+void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
+                       uint32_t memslot)
+{
+       sparsebit_idx_t i, last;
+       struct userspace_mem_region *region =
+               memslot2region(vm, memslot);
+
+       i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
+       last = i + (region->region.memory_size >> vm->page_shift);
+       for (;;) {
+               i = sparsebit_next_clear(region->unused_phy_pages, i);
+               if (i > last)
+                       break;
+
+               nested_map(vmx, vm,
+                          (uint64_t)i << vm->page_shift,
+                          (uint64_t)i << vm->page_shift,
+                          1 << vm->page_shift);
+       }
+}
+
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+                           uint64_t addr, uint64_t size)
+{
+       __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
+bool kvm_cpu_has_ept(void)
+{
+       uint64_t ctrl;
+
+       ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
+       if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+               return false;
+
+       ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+       return ctrl & SECONDARY_EXEC_ENABLE_EPT;
+}
+
+void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
+                 uint32_t eptp_memslot)
+{
+       TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+
+       vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
+       vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
+       vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
+}
+
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+       vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
+       vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
+       vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
deleted file mode 100644 (file)
index 89153a3..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Google LLC.
- */
-
-#include "apic.h"
-
-void apic_disable(void)
-{
-       wrmsr(MSR_IA32_APICBASE,
-             rdmsr(MSR_IA32_APICBASE) &
-               ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
-}
-
-void xapic_enable(void)
-{
-       uint64_t val = rdmsr(MSR_IA32_APICBASE);
-
-       /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
-       if (val & MSR_IA32_APICBASE_EXTD) {
-               apic_disable();
-               wrmsr(MSR_IA32_APICBASE,
-                     rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
-       } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
-               wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
-       }
-
-       /*
-        * Per SDM: reset value of spurious interrupt vector register has the
-        * APIC software enabled bit=0. It must be enabled in addition to the
-        * enable bit in the MSR.
-        */
-       val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
-       xapic_write_reg(APIC_SPIV, val);
-}
-
-void x2apic_enable(void)
-{
-       wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
-             MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
-       x2apic_write_reg(APIC_SPIV,
-                        x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
deleted file mode 100644 (file)
index 7629819..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-handle_exception:
-       push %r15
-       push %r14
-       push %r13
-       push %r12
-       push %r11
-       push %r10
-       push %r9
-       push %r8
-
-       push %rdi
-       push %rsi
-       push %rbp
-       push %rbx
-       push %rdx
-       push %rcx
-       push %rax
-       mov %rsp, %rdi
-
-       call route_exception
-
-       pop %rax
-       pop %rcx
-       pop %rdx
-       pop %rbx
-       pop %rbp
-       pop %rsi
-       pop %rdi
-       pop %r8
-       pop %r9
-       pop %r10
-       pop %r11
-       pop %r12
-       pop %r13
-       pop %r14
-       pop %r15
-
-       /* Discard vector and error code. */
-       add $16, %rsp
-       iretq
-
-/*
- * Build the handle_exception wrappers which push the vector/error code on the
- * stack and an array of pointers to those wrappers.
- */
-.pushsection .rodata
-.globl idt_handlers
-idt_handlers:
-.popsection
-
-.macro HANDLERS has_error from to
-       vector = \from
-       .rept \to - \from + 1
-       .align 8
-
-       /* Fetch current address and append it to idt_handlers. */
-666 :
-.pushsection .rodata
-       .quad 666b
-.popsection
-
-       .if ! \has_error
-       pushq $0
-       .endif
-       pushq $vector
-       jmp handle_exception
-       vector = vector + 1
-       .endr
-.endm
-
-.global idt_handler_code
-idt_handler_code:
-       HANDLERS has_error=0 from=0  to=7
-       HANDLERS has_error=1 from=8  to=8
-       HANDLERS has_error=0 from=9  to=9
-       HANDLERS has_error=1 from=10 to=14
-       HANDLERS has_error=0 from=15 to=16
-       HANDLERS has_error=1 from=17 to=17
-       HANDLERS has_error=0 from=18 to=255
-
-.section        .note.GNU-stack, "", %progbits
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
deleted file mode 100644 (file)
index 15bc8cd..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Hyper-V specific functions.
- *
- * Copyright (C) 2021, Red Hat Inc.
- */
-#include <stdint.h>
-#include "processor.h"
-#include "hyperv.h"
-
-const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
-{
-       static struct kvm_cpuid2 *cpuid;
-       int kvm_fd;
-
-       if (cpuid)
-               return cpuid;
-
-       cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
-       kvm_fd = open_kvm_dev_path_or_exit();
-
-       kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
-       close(kvm_fd);
-       return cpuid;
-}
-
-void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
-{
-       static struct kvm_cpuid2 *cpuid_full;
-       const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
-       int i, nent = 0;
-
-       if (!cpuid_full) {
-               cpuid_sys = kvm_get_supported_cpuid();
-               cpuid_hv = kvm_get_supported_hv_cpuid();
-
-               cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
-               if (!cpuid_full) {
-                       perror("malloc");
-                       abort();
-               }
-
-               /* Need to skip KVM CPUID leaves 0x400000xx */
-               for (i = 0; i < cpuid_sys->nent; i++) {
-                       if (cpuid_sys->entries[i].function >= 0x40000000 &&
-                           cpuid_sys->entries[i].function < 0x40000100)
-                               continue;
-                       cpuid_full->entries[nent] = cpuid_sys->entries[i];
-                       nent++;
-               }
-
-               memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
-                      cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
-               cpuid_full->nent = nent + cpuid_hv->nent;
-       }
-
-       vcpu_init_cpuid(vcpu, cpuid_full);
-}
-
-const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
-
-       vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
-       return cpuid;
-}
-
-bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature)
-{
-       if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID))
-               return false;
-
-       return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature);
-}
-
-struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
-                                                      vm_vaddr_t *p_hv_pages_gva)
-{
-       vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
-       struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
-
-       /* Setup of a region of guest memory for the VP Assist page. */
-       hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
-       hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
-       hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
-
-       /* Setup of a region of guest memory for the partition assist page. */
-       hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
-       hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
-       hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
-
-       /* Setup of a region of guest memory for the enlightened VMCS. */
-       hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
-       hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
-       hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
-
-       *p_hv_pages_gva = hv_pages_gva;
-       return hv;
-}
-
-int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
-{
-       uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
-               HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
-       wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
-
-       current_vp_assist = vp_assist;
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
deleted file mode 100644 (file)
index d61e623..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * x86_64-specific extensions to memstress.c.
- *
- * Copyright (C) 2022, Google, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "memstress.h"
-#include "processor.h"
-#include "vmx.h"
-
-void memstress_l2_guest_code(uint64_t vcpu_id)
-{
-       memstress_guest_code(vcpu_id);
-       vmcall();
-}
-
-extern char memstress_l2_guest_entry[];
-__asm__(
-"memstress_l2_guest_entry:"
-"      mov (%rsp), %rdi;"
-"      call memstress_l2_guest_code;"
-"      ud2;"
-);
-
-static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       unsigned long *rsp;
-
-       GUEST_ASSERT(vmx->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
-       GUEST_ASSERT(load_vmcs(vmx));
-       GUEST_ASSERT(ept_1g_pages_supported());
-
-       rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
-       *rsp = vcpu_id;
-       prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       GUEST_DONE();
-}
-
-uint64_t memstress_nested_pages(int nr_vcpus)
-{
-       /*
-        * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
-        * pages and 4-level paging, plus a few pages per-vCPU for data
-        * structures such as the VMCS.
-        */
-       return 513 + 10 * nr_vcpus;
-}
-
-void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
-{
-       uint64_t start, end;
-
-       prepare_eptp(vmx, vm, 0);
-
-       /*
-        * Identity map the first 4G and the test region with 1G pages so that
-        * KVM can shadow the EPT12 with the maximum huge page size supported
-        * by the backing source.
-        */
-       nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
-
-       start = align_down(memstress_args.gpa, PG_SIZE_1G);
-       end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
-       nested_identity_map_1g(vmx, vm, start, end - start);
-}
-
-void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
-{
-       struct vmx_pages *vmx, *vmx0 = NULL;
-       struct kvm_regs regs;
-       vm_vaddr_t vmx_gva;
-       int vcpu_id;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-       TEST_REQUIRE(kvm_cpu_has_ept());
-
-       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
-               vmx = vcpu_alloc_vmx(vm, &vmx_gva);
-
-               if (vcpu_id == 0) {
-                       memstress_setup_ept(vmx, vm);
-                       vmx0 = vmx;
-               } else {
-                       /* Share the same EPT table across all vCPUs. */
-                       vmx->eptp = vmx0->eptp;
-                       vmx->eptp_hva = vmx0->eptp_hva;
-                       vmx->eptp_gpa = vmx0->eptp_gpa;
-               }
-
-               /*
-                * Override the vCPU to run memstress_l1_guest_code() which will
-                * bounce it into L2 before calling memstress_guest_code().
-                */
-               vcpu_regs_get(vcpus[vcpu_id], &regs);
-               regs.rip = (unsigned long) memstress_l1_guest_code;
-               vcpu_regs_set(vcpus[vcpu_id], &regs);
-               vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
-       }
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86_64/pmu.c
deleted file mode 100644 (file)
index f31f042..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2023, Tencent, Inc.
- */
-
-#include <stdint.h>
-
-#include <linux/kernel.h>
-
-#include "kvm_util.h"
-#include "pmu.h"
-
-const uint64_t intel_pmu_arch_events[] = {
-       INTEL_ARCH_CPU_CYCLES,
-       INTEL_ARCH_INSTRUCTIONS_RETIRED,
-       INTEL_ARCH_REFERENCE_CYCLES,
-       INTEL_ARCH_LLC_REFERENCES,
-       INTEL_ARCH_LLC_MISSES,
-       INTEL_ARCH_BRANCHES_RETIRED,
-       INTEL_ARCH_BRANCHES_MISPREDICTED,
-       INTEL_ARCH_TOPDOWN_SLOTS,
-};
-kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
-
-const uint64_t amd_pmu_zen_events[] = {
-       AMD_ZEN_CORE_CYCLES,
-       AMD_ZEN_INSTRUCTIONS_RETIRED,
-       AMD_ZEN_BRANCHES_RETIRED,
-       AMD_ZEN_BRANCHES_MISPREDICTED,
-};
-kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
deleted file mode 100644 (file)
index 636b29b..0000000
+++ /dev/null
@@ -1,1295 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/processor.c
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#include "linux/bitmap.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "sev.h"
-
-#ifndef NUM_INTERRUPTS
-#define NUM_INTERRUPTS 256
-#endif
-
-#define KERNEL_CS      0x8
-#define KERNEL_DS      0x10
-#define KERNEL_TSS     0x18
-
-vm_vaddr_t exception_handlers;
-bool host_cpu_is_amd;
-bool host_cpu_is_intel;
-bool is_forced_emulation_enabled;
-uint64_t guest_tsc_khz;
-
-static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
-{
-       fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
-               "rcx: 0x%.16llx rdx: 0x%.16llx\n",
-               indent, "",
-               regs->rax, regs->rbx, regs->rcx, regs->rdx);
-       fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
-               "rsp: 0x%.16llx rbp: 0x%.16llx\n",
-               indent, "",
-               regs->rsi, regs->rdi, regs->rsp, regs->rbp);
-       fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
-               "r10: 0x%.16llx r11: 0x%.16llx\n",
-               indent, "",
-               regs->r8, regs->r9, regs->r10, regs->r11);
-       fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
-               "r14: 0x%.16llx r15: 0x%.16llx\n",
-               indent, "",
-               regs->r12, regs->r13, regs->r14, regs->r15);
-       fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
-               indent, "",
-               regs->rip, regs->rflags);
-}
-
-static void segment_dump(FILE *stream, struct kvm_segment *segment,
-                        uint8_t indent)
-{
-       fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
-               "selector: 0x%.4x type: 0x%.2x\n",
-               indent, "", segment->base, segment->limit,
-               segment->selector, segment->type);
-       fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
-               "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
-               indent, "", segment->present, segment->dpl,
-               segment->db, segment->s, segment->l);
-       fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
-               "unusable: 0x%.2x padding: 0x%.2x\n",
-               indent, "", segment->g, segment->avl,
-               segment->unusable, segment->padding);
-}
-
-static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
-                       uint8_t indent)
-{
-       fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
-               "padding: 0x%.4x 0x%.4x 0x%.4x\n",
-               indent, "", dtable->base, dtable->limit,
-               dtable->padding[0], dtable->padding[1], dtable->padding[2]);
-}
-
-static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
-{
-       unsigned int i;
-
-       fprintf(stream, "%*scs:\n", indent, "");
-       segment_dump(stream, &sregs->cs, indent + 2);
-       fprintf(stream, "%*sds:\n", indent, "");
-       segment_dump(stream, &sregs->ds, indent + 2);
-       fprintf(stream, "%*ses:\n", indent, "");
-       segment_dump(stream, &sregs->es, indent + 2);
-       fprintf(stream, "%*sfs:\n", indent, "");
-       segment_dump(stream, &sregs->fs, indent + 2);
-       fprintf(stream, "%*sgs:\n", indent, "");
-       segment_dump(stream, &sregs->gs, indent + 2);
-       fprintf(stream, "%*sss:\n", indent, "");
-       segment_dump(stream, &sregs->ss, indent + 2);
-       fprintf(stream, "%*str:\n", indent, "");
-       segment_dump(stream, &sregs->tr, indent + 2);
-       fprintf(stream, "%*sldt:\n", indent, "");
-       segment_dump(stream, &sregs->ldt, indent + 2);
-
-       fprintf(stream, "%*sgdt:\n", indent, "");
-       dtable_dump(stream, &sregs->gdt, indent + 2);
-       fprintf(stream, "%*sidt:\n", indent, "");
-       dtable_dump(stream, &sregs->idt, indent + 2);
-
-       fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
-               "cr3: 0x%.16llx cr4: 0x%.16llx\n",
-               indent, "",
-               sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
-       fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
-               "apic_base: 0x%.16llx\n",
-               indent, "",
-               sregs->cr8, sregs->efer, sregs->apic_base);
-
-       fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
-       for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
-               fprintf(stream, "%*s%.16llx\n", indent + 2, "",
-                       sregs->interrupt_bitmap[i]);
-       }
-}
-
-bool kvm_is_tdp_enabled(void)
-{
-       if (host_cpu_is_intel)
-               return get_kvm_intel_param_bool("ept");
-       else
-               return get_kvm_amd_param_bool("npt");
-}
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm)
-{
-       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
-               "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
-       /* If needed, create page map l4 table. */
-       if (!vm->pgd_created) {
-               vm->pgd = vm_alloc_page_table(vm);
-               vm->pgd_created = true;
-       }
-}
-
-static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
-                         uint64_t vaddr, int level)
-{
-       uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
-       uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
-       int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
-
-       TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
-                   "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
-                   level + 1, vaddr);
-
-       return &page_table[index];
-}
-
-static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
-                                      uint64_t *parent_pte,
-                                      uint64_t vaddr,
-                                      uint64_t paddr,
-                                      int current_level,
-                                      int target_level)
-{
-       uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
-
-       paddr = vm_untag_gpa(vm, paddr);
-
-       if (!(*pte & PTE_PRESENT_MASK)) {
-               *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
-               if (current_level == target_level)
-                       *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
-               else
-                       *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
-       } else {
-               /*
-                * Entry already present.  Assert that the caller doesn't want
-                * a hugepage at this level, and that there isn't a hugepage at
-                * this level.
-                */
-               TEST_ASSERT(current_level != target_level,
-                           "Cannot create hugepage at level: %u, vaddr: 0x%lx",
-                           current_level, vaddr);
-               TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
-                           "Cannot create page table at level: %u, vaddr: 0x%lx",
-                           current_level, vaddr);
-       }
-       return pte;
-}
-
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
-{
-       const uint64_t pg_size = PG_LEVEL_SIZE(level);
-       uint64_t *pml4e, *pdpe, *pde;
-       uint64_t *pte;
-
-       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
-                   "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
-       TEST_ASSERT((vaddr % pg_size) == 0,
-                   "Virtual address not aligned,\n"
-                   "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
-       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
-                   "Invalid virtual address, vaddr: 0x%lx", vaddr);
-       TEST_ASSERT((paddr % pg_size) == 0,
-                   "Physical address not aligned,\n"
-                   "  paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
-       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
-                   "Physical address beyond maximum supported,\n"
-                   "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-                   paddr, vm->max_gfn, vm->page_size);
-       TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
-                   "Unexpected bits in paddr: %lx", paddr);
-
-       /*
-        * Allocate upper level page tables, if not already present.  Return
-        * early if a hugepage was created.
-        */
-       pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
-       if (*pml4e & PTE_LARGE_MASK)
-               return;
-
-       pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
-       if (*pdpe & PTE_LARGE_MASK)
-               return;
-
-       pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
-       if (*pde & PTE_LARGE_MASK)
-               return;
-
-       /* Fill in page table entry. */
-       pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
-       TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
-                   "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
-       *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
-
-       /*
-        * Neither SEV nor TDX supports shared page tables, so only the final
-        * leaf PTE needs manually set the C/S-bit.
-        */
-       if (vm_is_gpa_protected(vm, paddr))
-               *pte |= vm->arch.c_bit;
-       else
-               *pte |= vm->arch.s_bit;
-}
-
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
-{
-       __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
-}
-
-void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-                   uint64_t nr_bytes, int level)
-{
-       uint64_t pg_size = PG_LEVEL_SIZE(level);
-       uint64_t nr_pages = nr_bytes / pg_size;
-       int i;
-
-       TEST_ASSERT(nr_bytes % pg_size == 0,
-                   "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
-                   nr_bytes, pg_size);
-
-       for (i = 0; i < nr_pages; i++) {
-               __virt_pg_map(vm, vaddr, paddr, level);
-
-               vaddr += pg_size;
-               paddr += pg_size;
-       }
-}
-
-static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
-{
-       if (*pte & PTE_LARGE_MASK) {
-               TEST_ASSERT(*level == PG_LEVEL_NONE ||
-                           *level == current_level,
-                           "Unexpected hugepage at level %d", current_level);
-               *level = current_level;
-       }
-
-       return *level == current_level;
-}
-
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
-                                   int *level)
-{
-       uint64_t *pml4e, *pdpe, *pde;
-
-       TEST_ASSERT(!vm->arch.is_pt_protected,
-                   "Walking page tables of protected guests is impossible");
-
-       TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
-                   "Invalid PG_LEVEL_* '%d'", *level);
-
-       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
-               "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-       TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
-               (vaddr >> vm->page_shift)),
-               "Invalid virtual address, vaddr: 0x%lx",
-               vaddr);
-       /*
-        * Based on the mode check above there are 48 bits in the vaddr, so
-        * shift 16 to sign extend the last bit (bit-47),
-        */
-       TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
-               "Canonical check failed.  The virtual address is invalid.");
-
-       pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
-       if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
-               return pml4e;
-
-       pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
-       if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
-               return pdpe;
-
-       pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
-       if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
-               return pde;
-
-       return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
-}
-
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
-{
-       int level = PG_LEVEL_4K;
-
-       return __vm_get_page_table_entry(vm, vaddr, &level);
-}
-
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
-       uint64_t *pml4e, *pml4e_start;
-       uint64_t *pdpe, *pdpe_start;
-       uint64_t *pde, *pde_start;
-       uint64_t *pte, *pte_start;
-
-       if (!vm->pgd_created)
-               return;
-
-       fprintf(stream, "%*s                                          "
-               "                no\n", indent, "");
-       fprintf(stream, "%*s      index hvaddr         gpaddr         "
-               "addr         w exec dirty\n",
-               indent, "");
-       pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
-       for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
-               pml4e = &pml4e_start[n1];
-               if (!(*pml4e & PTE_PRESENT_MASK))
-                       continue;
-               fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
-                       " %u\n",
-                       indent, "",
-                       pml4e - pml4e_start, pml4e,
-                       addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
-                       !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
-
-               pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
-               for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
-                       pdpe = &pdpe_start[n2];
-                       if (!(*pdpe & PTE_PRESENT_MASK))
-                               continue;
-                       fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
-                               "%u  %u\n",
-                               indent, "",
-                               pdpe - pdpe_start, pdpe,
-                               addr_hva2gpa(vm, pdpe),
-                               PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
-                               !!(*pdpe & PTE_NX_MASK));
-
-                       pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
-                       for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
-                               pde = &pde_start[n3];
-                               if (!(*pde & PTE_PRESENT_MASK))
-                                       continue;
-                               fprintf(stream, "%*spde   0x%-3zx %p "
-                                       "0x%-12lx 0x%-10llx %u  %u\n",
-                                       indent, "", pde - pde_start, pde,
-                                       addr_hva2gpa(vm, pde),
-                                       PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
-                                       !!(*pde & PTE_NX_MASK));
-
-                               pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
-                               for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
-                                       pte = &pte_start[n4];
-                                       if (!(*pte & PTE_PRESENT_MASK))
-                                               continue;
-                                       fprintf(stream, "%*spte   0x%-3zx %p "
-                                               "0x%-12lx 0x%-10llx %u  %u "
-                                               "    %u    0x%-10lx\n",
-                                               indent, "",
-                                               pte - pte_start, pte,
-                                               addr_hva2gpa(vm, pte),
-                                               PTE_GET_PFN(*pte),
-                                               !!(*pte & PTE_WRITABLE_MASK),
-                                               !!(*pte & PTE_NX_MASK),
-                                               !!(*pte & PTE_DIRTY_MASK),
-                                               ((uint64_t) n1 << 27)
-                                                       | ((uint64_t) n2 << 18)
-                                                       | ((uint64_t) n3 << 9)
-                                                       | ((uint64_t) n4));
-                               }
-                       }
-               }
-       }
-}
-
-/*
- * Set Unusable Segment
- *
- * Input Args: None
- *
- * Output Args:
- *   segp - Pointer to segment register
- *
- * Return: None
- *
- * Sets the segment register pointed to by @segp to an unusable state.
- */
-static void kvm_seg_set_unusable(struct kvm_segment *segp)
-{
-       memset(segp, 0, sizeof(*segp));
-       segp->unusable = true;
-}
-
-static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
-{
-       void *gdt = addr_gva2hva(vm, vm->arch.gdt);
-       struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
-
-       desc->limit0 = segp->limit & 0xFFFF;
-       desc->base0 = segp->base & 0xFFFF;
-       desc->base1 = segp->base >> 16;
-       desc->type = segp->type;
-       desc->s = segp->s;
-       desc->dpl = segp->dpl;
-       desc->p = segp->present;
-       desc->limit1 = segp->limit >> 16;
-       desc->avl = segp->avl;
-       desc->l = segp->l;
-       desc->db = segp->db;
-       desc->g = segp->g;
-       desc->base2 = segp->base >> 24;
-       if (!segp->s)
-               desc->base3 = segp->base >> 32;
-}
-
-static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp)
-{
-       memset(segp, 0, sizeof(*segp));
-       segp->selector = KERNEL_CS;
-       segp->limit = 0xFFFFFFFFu;
-       segp->s = 0x1; /* kTypeCodeData */
-       segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
-                                         * | kFlagCodeReadable
-                                         */
-       segp->g = true;
-       segp->l = true;
-       segp->present = 1;
-}
-
-static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
-{
-       memset(segp, 0, sizeof(*segp));
-       segp->selector = KERNEL_DS;
-       segp->limit = 0xFFFFFFFFu;
-       segp->s = 0x1; /* kTypeCodeData */
-       segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
-                                         * | kFlagDataWritable
-                                         */
-       segp->g = true;
-       segp->present = true;
-}
-
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
-       int level = PG_LEVEL_NONE;
-       uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
-
-       TEST_ASSERT(*pte & PTE_PRESENT_MASK,
-                   "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
-
-       /*
-        * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
-        * address bits to be zero.
-        */
-       return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
-}
-
-static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
-{
-       memset(segp, 0, sizeof(*segp));
-       segp->base = base;
-       segp->limit = 0x67;
-       segp->selector = KERNEL_TSS;
-       segp->type = 0xb;
-       segp->present = 1;
-}
-
-static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       struct kvm_sregs sregs;
-
-       TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
-
-       /* Set mode specific system register values. */
-       vcpu_sregs_get(vcpu, &sregs);
-
-       sregs.idt.base = vm->arch.idt;
-       sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
-       sregs.gdt.base = vm->arch.gdt;
-       sregs.gdt.limit = getpagesize() - 1;
-
-       sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
-       sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
-       if (kvm_cpu_has(X86_FEATURE_XSAVE))
-               sregs.cr4 |= X86_CR4_OSXSAVE;
-       sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
-
-       kvm_seg_set_unusable(&sregs.ldt);
-       kvm_seg_set_kernel_code_64bit(&sregs.cs);
-       kvm_seg_set_kernel_data_64bit(&sregs.ds);
-       kvm_seg_set_kernel_data_64bit(&sregs.es);
-       kvm_seg_set_kernel_data_64bit(&sregs.gs);
-       kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
-
-       sregs.cr3 = vm->pgd;
-       vcpu_sregs_set(vcpu, &sregs);
-}
-
-static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       struct kvm_xcrs xcrs = {
-               .nr_xcrs = 1,
-               .xcrs[0].xcr = 0,
-               .xcrs[0].value = kvm_cpu_supported_xcr0(),
-       };
-
-       if (!kvm_cpu_has(X86_FEATURE_XSAVE))
-               return;
-
-       vcpu_xcrs_set(vcpu, &xcrs);
-}
-
-static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
-                         int dpl, unsigned short selector)
-{
-       struct idt_entry *base =
-               (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt);
-       struct idt_entry *e = &base[vector];
-
-       memset(e, 0, sizeof(*e));
-       e->offset0 = addr;
-       e->selector = selector;
-       e->ist = 0;
-       e->type = 14;
-       e->dpl = dpl;
-       e->p = 1;
-       e->offset1 = addr >> 16;
-       e->offset2 = addr >> 32;
-}
-
-static bool kvm_fixup_exception(struct ex_regs *regs)
-{
-       if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
-               return false;
-
-       if (regs->vector == DE_VECTOR)
-               return false;
-
-       regs->rip = regs->r11;
-       regs->r9 = regs->vector;
-       regs->r10 = regs->error_code;
-       return true;
-}
-
-void route_exception(struct ex_regs *regs)
-{
-       typedef void(*handler)(struct ex_regs *);
-       handler *handlers = (handler *)exception_handlers;
-
-       if (handlers && handlers[regs->vector]) {
-               handlers[regs->vector](regs);
-               return;
-       }
-
-       if (kvm_fixup_exception(regs))
-               return;
-
-       GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'",
-                  regs->vector, regs->rip);
-}
-
-static void vm_init_descriptor_tables(struct kvm_vm *vm)
-{
-       extern void *idt_handlers;
-       struct kvm_segment seg;
-       int i;
-
-       vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-       vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-       vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-       vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-
-       /* Handlers have the same address in both address spaces.*/
-       for (i = 0; i < NUM_INTERRUPTS; i++)
-               set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
-
-       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
-
-       kvm_seg_set_kernel_code_64bit(&seg);
-       kvm_seg_fill_gdt_64bit(vm, &seg);
-
-       kvm_seg_set_kernel_data_64bit(&seg);
-       kvm_seg_fill_gdt_64bit(vm, &seg);
-
-       kvm_seg_set_tss_64bit(vm->arch.tss, &seg);
-       kvm_seg_fill_gdt_64bit(vm, &seg);
-}
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
-                              void (*handler)(struct ex_regs *))
-{
-       vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
-
-       handlers[vector] = (vm_vaddr_t)handler;
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       if (get_ucall(vcpu, &uc) == UCALL_ABORT)
-               REPORT_GUEST_ASSERT(uc);
-}
-
-void kvm_arch_vm_post_create(struct kvm_vm *vm)
-{
-       int r;
-
-       TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ),
-                   "Require KVM_GET_TSC_KHZ to provide udelay() to guest.");
-
-       vm_create_irqchip(vm);
-       vm_init_descriptor_tables(vm);
-
-       sync_global_to_guest(vm, host_cpu_is_intel);
-       sync_global_to_guest(vm, host_cpu_is_amd);
-       sync_global_to_guest(vm, is_forced_emulation_enabled);
-
-       if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
-               struct kvm_sev_init init = { 0 };
-
-               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
-       }
-
-       r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL);
-       TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency.");
-       guest_tsc_khz = r;
-       sync_global_to_guest(vm, guest_tsc_khz);
-}
-
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
-{
-       struct kvm_regs regs;
-
-       vcpu_regs_get(vcpu, &regs);
-       regs.rip = (unsigned long) guest_code;
-       vcpu_regs_set(vcpu, &regs);
-}
-
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
-{
-       struct kvm_mp_state mp_state;
-       struct kvm_regs regs;
-       vm_vaddr_t stack_vaddr;
-       struct kvm_vcpu *vcpu;
-
-       stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
-                                      DEFAULT_GUEST_STACK_VADDR_MIN,
-                                      MEM_REGION_DATA);
-
-       stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
-
-       /*
-        * Align stack to match calling sequence requirements in section "The
-        * Stack Frame" of the System V ABI AMD64 Architecture Processor
-        * Supplement, which requires the value (%rsp + 8) to be a multiple of
-        * 16 when control is transferred to the function entry point.
-        *
-        * If this code is ever used to launch a vCPU with 32-bit entry point it
-        * may need to subtract 4 bytes instead of 8 bytes.
-        */
-       TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
-                   "__vm_vaddr_alloc() did not provide a page-aligned address");
-       stack_vaddr -= 8;
-
-       vcpu = __vm_vcpu_add(vm, vcpu_id);
-       vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
-       vcpu_init_sregs(vm, vcpu);
-       vcpu_init_xcrs(vm, vcpu);
-
-       /* Setup guest general purpose registers */
-       vcpu_regs_get(vcpu, &regs);
-       regs.rflags = regs.rflags | 0x2;
-       regs.rsp = stack_vaddr;
-       vcpu_regs_set(vcpu, &regs);
-
-       /* Setup the MP state */
-       mp_state.mp_state = 0;
-       vcpu_mp_state_set(vcpu, &mp_state);
-
-       /*
-        * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime"
-        * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are
-        * reflected into selftests' vCPU CPUID cache, i.e. so that the cache
-        * is consistent with vCPU state.
-        */
-       vcpu_get_cpuid(vcpu);
-       return vcpu;
-}
-
-struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
-{
-       struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
-
-       vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
-
-       return vcpu;
-}
-
-void vcpu_arch_free(struct kvm_vcpu *vcpu)
-{
-       if (vcpu->cpuid)
-               free(vcpu->cpuid);
-}
-
-/* Do not use kvm_supported_cpuid directly except for validity checks. */
-static void *kvm_supported_cpuid;
-
-const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
-{
-       int kvm_fd;
-
-       if (kvm_supported_cpuid)
-               return kvm_supported_cpuid;
-
-       kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
-       kvm_fd = open_kvm_dev_path_or_exit();
-
-       kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
-                 (struct kvm_cpuid2 *)kvm_supported_cpuid);
-
-       close(kvm_fd);
-       return kvm_supported_cpuid;
-}
-
-static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
-                             uint32_t function, uint32_t index,
-                             uint8_t reg, uint8_t lo, uint8_t hi)
-{
-       const struct kvm_cpuid_entry2 *entry;
-       int i;
-
-       for (i = 0; i < cpuid->nent; i++) {
-               entry = &cpuid->entries[i];
-
-               /*
-                * The output registers in kvm_cpuid_entry2 are in alphabetical
-                * order, but kvm_x86_cpu_feature matches that mess, so yay
-                * pointer shenanigans!
-                */
-               if (entry->function == function && entry->index == index)
-                       return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
-       }
-
-       return 0;
-}
-
-bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
-                  struct kvm_x86_cpu_feature feature)
-{
-       return __kvm_cpu_has(cpuid, feature.function, feature.index,
-                            feature.reg, feature.bit, feature.bit);
-}
-
-uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
-                           struct kvm_x86_cpu_property property)
-{
-       return __kvm_cpu_has(cpuid, property.function, property.index,
-                            property.reg, property.lo_bit, property.hi_bit);
-}
-
-uint64_t kvm_get_feature_msr(uint64_t msr_index)
-{
-       struct {
-               struct kvm_msrs header;
-               struct kvm_msr_entry entry;
-       } buffer = {};
-       int r, kvm_fd;
-
-       buffer.header.nmsrs = 1;
-       buffer.entry.index = msr_index;
-       kvm_fd = open_kvm_dev_path_or_exit();
-
-       r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
-       TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
-
-       close(kvm_fd);
-       return buffer.entry.data;
-}
-
-void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
-{
-       int kvm_fd;
-       u64 bitmask;
-       long rc;
-       struct kvm_device_attr attr = {
-               .group = 0,
-               .attr = KVM_X86_XCOMP_GUEST_SUPP,
-               .addr = (unsigned long) &bitmask,
-       };
-
-       TEST_ASSERT(!kvm_supported_cpuid,
-                   "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
-
-       TEST_ASSERT(is_power_of_2(xfeature),
-                   "Dynamic XFeatures must be enabled one at a time");
-
-       kvm_fd = open_kvm_dev_path_or_exit();
-       rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
-       close(kvm_fd);
-
-       if (rc == -1 && (errno == ENXIO || errno == EINVAL))
-               __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
-
-       TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
-
-       __TEST_REQUIRE(bitmask & xfeature,
-                      "Required XSAVE feature '%s' not supported", name);
-
-       TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
-
-       rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
-       TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
-       TEST_ASSERT(bitmask & xfeature,
-                   "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
-                   name, xfeature, bitmask);
-}
-
-void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
-{
-       TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
-
-       /* Allow overriding the default CPUID. */
-       if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
-               free(vcpu->cpuid);
-               vcpu->cpuid = NULL;
-       }
-
-       if (!vcpu->cpuid)
-               vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
-
-       memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
-       vcpu_set_cpuid(vcpu);
-}
-
-void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
-                            struct kvm_x86_cpu_property property,
-                            uint32_t value)
-{
-       struct kvm_cpuid_entry2 *entry;
-
-       entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
-
-       (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
-       (&entry->eax)[property.reg] |= value << property.lo_bit;
-
-       vcpu_set_cpuid(vcpu);
-
-       /* Sanity check that @value doesn't exceed the bounds in any way. */
-       TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
-}
-
-void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
-{
-       struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
-
-       entry->eax = 0;
-       entry->ebx = 0;
-       entry->ecx = 0;
-       entry->edx = 0;
-       vcpu_set_cpuid(vcpu);
-}
-
-void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
-                                    struct kvm_x86_cpu_feature feature,
-                                    bool set)
-{
-       struct kvm_cpuid_entry2 *entry;
-       u32 *reg;
-
-       entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
-       reg = (&entry->eax) + feature.reg;
-
-       if (set)
-               *reg |= BIT(feature.bit);
-       else
-               *reg &= ~BIT(feature.bit);
-
-       vcpu_set_cpuid(vcpu);
-}
-
-uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
-{
-       struct {
-               struct kvm_msrs header;
-               struct kvm_msr_entry entry;
-       } buffer = {};
-
-       buffer.header.nmsrs = 1;
-       buffer.entry.index = msr_index;
-
-       vcpu_msrs_get(vcpu, &buffer.header);
-
-       return buffer.entry.data;
-}
-
-int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
-{
-       struct {
-               struct kvm_msrs header;
-               struct kvm_msr_entry entry;
-       } buffer = {};
-
-       memset(&buffer, 0, sizeof(buffer));
-       buffer.header.nmsrs = 1;
-       buffer.entry.index = msr_index;
-       buffer.entry.data = msr_value;
-
-       return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
-}
-
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
-{
-       va_list ap;
-       struct kvm_regs regs;
-
-       TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
-                   "  num: %u",
-                   num);
-
-       va_start(ap, num);
-       vcpu_regs_get(vcpu, &regs);
-
-       if (num >= 1)
-               regs.rdi = va_arg(ap, uint64_t);
-
-       if (num >= 2)
-               regs.rsi = va_arg(ap, uint64_t);
-
-       if (num >= 3)
-               regs.rdx = va_arg(ap, uint64_t);
-
-       if (num >= 4)
-               regs.rcx = va_arg(ap, uint64_t);
-
-       if (num >= 5)
-               regs.r8 = va_arg(ap, uint64_t);
-
-       if (num >= 6)
-               regs.r9 = va_arg(ap, uint64_t);
-
-       vcpu_regs_set(vcpu, &regs);
-       va_end(ap);
-}
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
-{
-       struct kvm_regs regs;
-       struct kvm_sregs sregs;
-
-       fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
-
-       fprintf(stream, "%*sregs:\n", indent + 2, "");
-       vcpu_regs_get(vcpu, &regs);
-       regs_dump(stream, &regs, indent + 4);
-
-       fprintf(stream, "%*ssregs:\n", indent + 2, "");
-       vcpu_sregs_get(vcpu, &sregs);
-       sregs_dump(stream, &sregs, indent + 4);
-}
-
-static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
-{
-       struct kvm_msr_list *list;
-       struct kvm_msr_list nmsrs;
-       int kvm_fd, r;
-
-       kvm_fd = open_kvm_dev_path_or_exit();
-
-       nmsrs.nmsrs = 0;
-       if (!feature_msrs)
-               r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
-       else
-               r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
-
-       TEST_ASSERT(r == -1 && errno == E2BIG,
-                   "Expected -E2BIG, got rc: %i errno: %i (%s)",
-                   r, errno, strerror(errno));
-
-       list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
-       TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
-       list->nmsrs = nmsrs.nmsrs;
-
-       if (!feature_msrs)
-               kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
-       else
-               kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
-       close(kvm_fd);
-
-       TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
-                   "Number of MSRs in list changed, was %d, now %d",
-                   nmsrs.nmsrs, list->nmsrs);
-       return list;
-}
-
-const struct kvm_msr_list *kvm_get_msr_index_list(void)
-{
-       static const struct kvm_msr_list *list;
-
-       if (!list)
-               list = __kvm_get_msr_index_list(false);
-       return list;
-}
-
-
-const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
-{
-       static const struct kvm_msr_list *list;
-
-       if (!list)
-               list = __kvm_get_msr_index_list(true);
-       return list;
-}
-
-bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
-{
-       const struct kvm_msr_list *list = kvm_get_msr_index_list();
-       int i;
-
-       for (i = 0; i < list->nmsrs; ++i) {
-               if (list->indices[i] == msr_index)
-                       return true;
-       }
-
-       return false;
-}
-
-static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
-                                 struct kvm_x86_state *state)
-{
-       int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
-
-       if (size) {
-               state->xsave = malloc(size);
-               vcpu_xsave2_get(vcpu, state->xsave);
-       } else {
-               state->xsave = malloc(sizeof(struct kvm_xsave));
-               vcpu_xsave_get(vcpu, state->xsave);
-       }
-}
-
-struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
-{
-       const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
-       struct kvm_x86_state *state;
-       int i;
-
-       static int nested_size = -1;
-
-       if (nested_size == -1) {
-               nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
-               TEST_ASSERT(nested_size <= sizeof(state->nested_),
-                           "Nested state size too big, %i > %zi",
-                           nested_size, sizeof(state->nested_));
-       }
-
-       /*
-        * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
-        * guest state is consistent only after userspace re-enters the
-        * kernel with KVM_RUN.  Complete IO prior to migrating state
-        * to a new VM.
-        */
-       vcpu_run_complete_io(vcpu);
-
-       state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
-       TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
-
-       vcpu_events_get(vcpu, &state->events);
-       vcpu_mp_state_get(vcpu, &state->mp_state);
-       vcpu_regs_get(vcpu, &state->regs);
-       vcpu_save_xsave_state(vcpu, state);
-
-       if (kvm_has_cap(KVM_CAP_XCRS))
-               vcpu_xcrs_get(vcpu, &state->xcrs);
-
-       vcpu_sregs_get(vcpu, &state->sregs);
-
-       if (nested_size) {
-               state->nested.size = sizeof(state->nested_);
-
-               vcpu_nested_state_get(vcpu, &state->nested);
-               TEST_ASSERT(state->nested.size <= nested_size,
-                           "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
-                           state->nested.size, nested_size);
-       } else {
-               state->nested.size = 0;
-       }
-
-       state->msrs.nmsrs = msr_list->nmsrs;
-       for (i = 0; i < msr_list->nmsrs; i++)
-               state->msrs.entries[i].index = msr_list->indices[i];
-       vcpu_msrs_get(vcpu, &state->msrs);
-
-       vcpu_debugregs_get(vcpu, &state->debugregs);
-
-       return state;
-}
-
-void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
-{
-       vcpu_sregs_set(vcpu, &state->sregs);
-       vcpu_msrs_set(vcpu, &state->msrs);
-
-       if (kvm_has_cap(KVM_CAP_XCRS))
-               vcpu_xcrs_set(vcpu, &state->xcrs);
-
-       vcpu_xsave_set(vcpu,  state->xsave);
-       vcpu_events_set(vcpu, &state->events);
-       vcpu_mp_state_set(vcpu, &state->mp_state);
-       vcpu_debugregs_set(vcpu, &state->debugregs);
-       vcpu_regs_set(vcpu, &state->regs);
-
-       if (state->nested.size)
-               vcpu_nested_state_set(vcpu, &state->nested);
-}
-
-void kvm_x86_state_cleanup(struct kvm_x86_state *state)
-{
-       free(state->xsave);
-       free(state);
-}
-
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
-{
-       if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
-               *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
-               *va_bits = 32;
-       } else {
-               *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
-               *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
-       }
-}
-
-void kvm_init_vm_address_properties(struct kvm_vm *vm)
-{
-       if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
-               vm->arch.sev_fd = open_sev_dev_path_or_exit();
-               vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
-               vm->gpa_tag_mask = vm->arch.c_bit;
-       } else {
-               vm->arch.sev_fd = -1;
-       }
-}
-
-const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
-                                              uint32_t function, uint32_t index)
-{
-       int i;
-
-       for (i = 0; i < cpuid->nent; i++) {
-               if (cpuid->entries[i].function == function &&
-                   cpuid->entries[i].index == index)
-                       return &cpuid->entries[i];
-       }
-
-       TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
-
-       return NULL;
-}
-
-#define X86_HYPERCALL(inputs...)                                       \
-({                                                                     \
-       uint64_t r;                                                     \
-                                                                       \
-       asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t"          \
-                    "jnz 1f\n\t"                                       \
-                    "vmcall\n\t"                                       \
-                    "jmp 2f\n\t"                                       \
-                    "1: vmmcall\n\t"                                   \
-                    "2:"                                               \
-                    : "=a"(r)                                          \
-                    : [use_vmmcall] "r" (host_cpu_is_amd), inputs);    \
-                                                                       \
-       r;                                                              \
-})
-
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
-                      uint64_t a3)
-{
-       return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
-}
-
-uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
-{
-       return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
-}
-
-void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
-{
-       GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
-}
-
-unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
-{
-       const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
-       unsigned long ht_gfn, max_gfn, max_pfn;
-       uint8_t maxphyaddr, guest_maxphyaddr;
-
-       /*
-        * Use "guest MAXPHYADDR" from KVM if it's available.  Guest MAXPHYADDR
-        * enumerates the max _mappable_ GPA, which can be less than the raw
-        * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
-        * doesn't support 5-level TDP.
-        */
-       guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
-       guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
-       TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
-                   "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
-
-       max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
-
-       /* Avoid reserved HyperTransport region on AMD processors.  */
-       if (!host_cpu_is_amd)
-               return max_gfn;
-
-       /* On parts with <40 physical address bits, the area is fully hidden */
-       if (vm->pa_bits < 40)
-               return max_gfn;
-
-       /* Before family 17h, the HyperTransport area is just below 1T.  */
-       ht_gfn = (1 << 28) - num_ht_pages;
-       if (this_cpu_family() < 0x17)
-               goto done;
-
-       /*
-        * Otherwise it's at the top of the physical address space, possibly
-        * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX.  Use
-        * the old conservative value if MAXPHYADDR is not enumerated.
-        */
-       if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
-               goto done;
-
-       maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
-       max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
-
-       if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
-               max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
-
-       ht_gfn = max_pfn - num_ht_pages;
-done:
-       return min(max_gfn, ht_gfn - 1);
-}
-
-/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
-       /* Ensure that a KVM vendor-specific module is loaded. */
-       if (vm == NULL)
-               close(open_kvm_dev_path_or_exit());
-
-       return get_kvm_intel_param_bool("unrestricted_guest");
-}
-
-void kvm_selftest_arch_init(void)
-{
-       host_cpu_is_intel = this_cpu_is_intel();
-       host_cpu_is_amd = this_cpu_is_amd();
-       is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
-}
-
-bool sys_clocksource_is_based_on_tsc(void)
-{
-       char *clk_name = sys_get_cur_clocksource();
-       bool ret = !strcmp(clk_name, "tsc\n") ||
-                  !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
-
-       free(clk_name);
-
-       return ret;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c
deleted file mode 100644 (file)
index e9535ee..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "sev.h"
-
-/*
- * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
- * -1 would then cause an underflow back to 2**64 - 1. This is expected and
- * correct.
- *
- * If the last range in the sparsebit is [x, y] and we try to iterate,
- * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
- * and find the first range, but that's correct because the condition
- * expression would cause us to quit the loop.
- */
-static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
-{
-       const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
-       const vm_paddr_t gpa_base = region->region.guest_phys_addr;
-       const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
-       sparsebit_idx_t i, j;
-
-       if (!sparsebit_any_set(protected_phy_pages))
-               return;
-
-       sev_register_encrypted_memory(vm, region);
-
-       sparsebit_for_each_set_range(protected_phy_pages, i, j) {
-               const uint64_t size = (j - i + 1) * vm->page_size;
-               const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
-
-               sev_launch_update_data(vm, gpa_base + offset, size);
-       }
-}
-
-void sev_vm_init(struct kvm_vm *vm)
-{
-       if (vm->type == KVM_X86_DEFAULT_VM) {
-               assert(vm->arch.sev_fd == -1);
-               vm->arch.sev_fd = open_sev_dev_path_or_exit();
-               vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
-       } else {
-               struct kvm_sev_init init = { 0 };
-               assert(vm->type == KVM_X86_SEV_VM);
-               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
-       }
-}
-
-void sev_es_vm_init(struct kvm_vm *vm)
-{
-       if (vm->type == KVM_X86_DEFAULT_VM) {
-               assert(vm->arch.sev_fd == -1);
-               vm->arch.sev_fd = open_sev_dev_path_or_exit();
-               vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
-       } else {
-               struct kvm_sev_init init = { 0 };
-               assert(vm->type == KVM_X86_SEV_ES_VM);
-               vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
-       }
-}
-
-void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
-{
-       struct kvm_sev_launch_start launch_start = {
-               .policy = policy,
-       };
-       struct userspace_mem_region *region;
-       struct kvm_sev_guest_status status;
-       int ctr;
-
-       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-
-       TEST_ASSERT_EQ(status.policy, policy);
-       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
-
-       hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
-               encrypt_region(vm, region);
-
-       if (policy & SEV_POLICY_ES)
-               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
-
-       vm->arch.is_pt_protected = true;
-}
-
-void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
-{
-       struct kvm_sev_launch_measure launch_measure;
-       struct kvm_sev_guest_status guest_status;
-
-       launch_measure.len = 256;
-       launch_measure.uaddr = (__u64)measurement;
-       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
-
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
-       TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
-}
-
-void sev_vm_launch_finish(struct kvm_vm *vm)
-{
-       struct kvm_sev_guest_status status;
-
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-       TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
-                   status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
-                   "Unexpected guest state: %d", status.state);
-
-       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
-
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
-}
-
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
-                                          struct kvm_vcpu **cpu)
-{
-       struct vm_shape shape = {
-               .mode = VM_MODE_DEFAULT,
-               .type = type,
-       };
-       struct kvm_vm *vm;
-       struct kvm_vcpu *cpus[1];
-
-       vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
-       *cpu = cpus[0];
-
-       return vm;
-}
-
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
-{
-       sev_vm_launch(vm, policy);
-
-       if (!measurement)
-               measurement = alloca(256);
-
-       sev_vm_launch_measure(vm, measurement);
-
-       sev_vm_launch_finish(vm);
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
deleted file mode 100644 (file)
index 5495a92..0000000
+++ /dev/null
@@ -1,164 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/svm.c
- * Helpers used for nested SVM testing
- * Largely inspired from KVM unit test svm.c
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-
-#define SEV_DEV_PATH "/dev/sev"
-
-struct gpr64_regs guest_regs;
-u64 rflags;
-
-/* Allocate memory regions for nested SVM tests.
- *
- * Input Args:
- *   vm - The VM to allocate guest-virtual addresses in.
- *
- * Output Args:
- *   p_svm_gva - The guest virtual address for the struct svm_test_data.
- *
- * Return:
- *   Pointer to structure with the addresses of the SVM areas.
- */
-struct svm_test_data *
-vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
-{
-       vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
-       struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
-
-       svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
-       svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
-       svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
-
-       svm->save_area = (void *)vm_vaddr_alloc_page(vm);
-       svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
-       svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
-
-       svm->msr = (void *)vm_vaddr_alloc_page(vm);
-       svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
-       svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
-       memset(svm->msr_hva, 0, getpagesize());
-
-       *p_svm_gva = svm_gva;
-       return svm;
-}
-
-static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
-                        u64 base, u32 limit, u32 attr)
-{
-       seg->selector = selector;
-       seg->attrib = attr;
-       seg->limit = limit;
-       seg->base = base;
-}
-
-void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
-{
-       struct vmcb *vmcb = svm->vmcb;
-       uint64_t vmcb_gpa = svm->vmcb_gpa;
-       struct vmcb_save_area *save = &vmcb->save;
-       struct vmcb_control_area *ctrl = &vmcb->control;
-       u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
-             | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
-       u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
-               | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
-       uint64_t efer;
-
-       efer = rdmsr(MSR_EFER);
-       wrmsr(MSR_EFER, efer | EFER_SVME);
-       wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
-
-       memset(vmcb, 0, sizeof(*vmcb));
-       asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
-       vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
-       vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
-       vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
-       vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr);
-       vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0);
-       vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0);
-
-       ctrl->asid = 1;
-       save->cpl = 0;
-       save->efer = rdmsr(MSR_EFER);
-       asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory");
-       asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory");
-       asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory");
-       asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory");
-       asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory");
-       asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory");
-       save->g_pat = rdmsr(MSR_IA32_CR_PAT);
-       save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
-       ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
-                               (1ULL << INTERCEPT_VMMCALL);
-       ctrl->msrpm_base_pa = svm->msr_gpa;
-
-       vmcb->save.rip = (u64)guest_rip;
-       vmcb->save.rsp = (u64)guest_rsp;
-       guest_regs.rdi = (u64)svm;
-}
-
-/*
- * save/restore 64-bit general registers except rax, rip, rsp
- * which are directly handed through the VMCB guest processor state
- */
-#define SAVE_GPR_C                             \
-       "xchg %%rbx, guest_regs+0x20\n\t"       \
-       "xchg %%rcx, guest_regs+0x10\n\t"       \
-       "xchg %%rdx, guest_regs+0x18\n\t"       \
-       "xchg %%rbp, guest_regs+0x30\n\t"       \
-       "xchg %%rsi, guest_regs+0x38\n\t"       \
-       "xchg %%rdi, guest_regs+0x40\n\t"       \
-       "xchg %%r8,  guest_regs+0x48\n\t"       \
-       "xchg %%r9,  guest_regs+0x50\n\t"       \
-       "xchg %%r10, guest_regs+0x58\n\t"       \
-       "xchg %%r11, guest_regs+0x60\n\t"       \
-       "xchg %%r12, guest_regs+0x68\n\t"       \
-       "xchg %%r13, guest_regs+0x70\n\t"       \
-       "xchg %%r14, guest_regs+0x78\n\t"       \
-       "xchg %%r15, guest_regs+0x80\n\t"
-
-#define LOAD_GPR_C      SAVE_GPR_C
-
-/*
- * selftests do not use interrupts so we dropped clgi/sti/cli/stgi
- * for now. registers involved in LOAD/SAVE_GPR_C are eventually
- * unmodified so they do not need to be in the clobber list.
- */
-void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
-{
-       asm volatile (
-               "vmload %[vmcb_gpa]\n\t"
-               "mov rflags, %%r15\n\t" // rflags
-               "mov %%r15, 0x170(%[vmcb])\n\t"
-               "mov guest_regs, %%r15\n\t"     // rax
-               "mov %%r15, 0x1f8(%[vmcb])\n\t"
-               LOAD_GPR_C
-               "vmrun %[vmcb_gpa]\n\t"
-               SAVE_GPR_C
-               "mov 0x170(%[vmcb]), %%r15\n\t" // rflags
-               "mov %%r15, rflags\n\t"
-               "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
-               "mov %%r15, guest_regs\n\t"
-               "vmsave %[vmcb_gpa]\n\t"
-               : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
-               : "r15", "memory");
-}
-
-/*
- * Open SEV_DEV_PATH if available, otherwise exit the entire program.
- *
- * Return:
- *   The opened file descriptor of /dev/sev.
- */
-int open_sev_dev_path_or_exit(void)
-{
-       return open_path_or_exit(SEV_DEV_PATH, 0);
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
deleted file mode 100644 (file)
index 1265cec..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucall support. A ucall is a "hypercall to userspace".
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include "kvm_util.h"
-
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
-
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       /*
-        * FIXME: Revert this hack (the entire commit that added it) once nVMX
-        * preserves L2 GPRs across a nested VM-Exit.  If a ucall from L2, e.g.
-        * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
-        * clobbered by L1.  Save and restore non-volatile GPRs (clobbering RBP
-        * in particular is problematic) along with RDX and RDI (which are
-        * inputs), and clobber volatile GPRs. *sigh*
-        */
-#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
-       "rcx", "rsi", "r8", "r9", "r10", "r11"
-
-       asm volatile("push %%rbp\n\t"
-                    "push %%r15\n\t"
-                    "push %%r14\n\t"
-                    "push %%r13\n\t"
-                    "push %%r12\n\t"
-                    "push %%rbx\n\t"
-                    "push %%rdx\n\t"
-                    "push %%rdi\n\t"
-                    "in %[port], %%al\n\t"
-                    "pop %%rdi\n\t"
-                    "pop %%rdx\n\t"
-                    "pop %%rbx\n\t"
-                    "pop %%r12\n\t"
-                    "pop %%r13\n\t"
-                    "pop %%r14\n\t"
-                    "pop %%r15\n\t"
-                    "pop %%rbp\n\t"
-               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
-                    HORRIFIC_L2_UCALL_CLOBBER_HACK);
-}
-
-void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-
-       if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
-               struct kvm_regs regs;
-
-               vcpu_regs_get(vcpu, &regs);
-               return (void *)regs.rdi;
-       }
-       return NULL;
-}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
deleted file mode 100644 (file)
index d7ac122..0000000
+++ /dev/null
@@ -1,554 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tools/testing/selftests/kvm/lib/x86_64/vmx.c
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#include <asm/msr-index.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#define PAGE_SHIFT_4K  12
-
-#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
-
-bool enable_evmcs;
-
-struct hv_enlightened_vmcs *current_evmcs;
-struct hv_vp_assist_page *current_vp_assist;
-
-struct eptPageTableEntry {
-       uint64_t readable:1;
-       uint64_t writable:1;
-       uint64_t executable:1;
-       uint64_t memory_type:3;
-       uint64_t ignore_pat:1;
-       uint64_t page_size:1;
-       uint64_t accessed:1;
-       uint64_t dirty:1;
-       uint64_t ignored_11_10:2;
-       uint64_t address:40;
-       uint64_t ignored_62_52:11;
-       uint64_t suppress_ve:1;
-};
-
-struct eptPageTablePointer {
-       uint64_t memory_type:3;
-       uint64_t page_walk_length:3;
-       uint64_t ad_enabled:1;
-       uint64_t reserved_11_07:5;
-       uint64_t address:40;
-       uint64_t reserved_63_52:12;
-};
-int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
-{
-       uint16_t evmcs_ver;
-
-       vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
-                       (unsigned long)&evmcs_ver);
-
-       /* KVM should return supported EVMCS version range */
-       TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
-                   (evmcs_ver & 0xff) > 0,
-                   "Incorrect EVMCS version range: %x:%x",
-                   evmcs_ver & 0xff, evmcs_ver >> 8);
-
-       return evmcs_ver;
-}
-
-/* Allocate memory regions for nested VMX tests.
- *
- * Input Args:
- *   vm - The VM to allocate guest-virtual addresses in.
- *
- * Output Args:
- *   p_vmx_gva - The guest virtual address for the struct vmx_pages.
- *
- * Return:
- *   Pointer to structure with the addresses of the VMX areas.
- */
-struct vmx_pages *
-vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
-{
-       vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
-       struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
-
-       /* Setup of a region of guest memory for the vmxon region. */
-       vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
-       vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
-       vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
-
-       /* Setup of a region of guest memory for a vmcs. */
-       vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
-       vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
-       vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
-
-       /* Setup of a region of guest memory for the MSR bitmap. */
-       vmx->msr = (void *)vm_vaddr_alloc_page(vm);
-       vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
-       vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
-       memset(vmx->msr_hva, 0, getpagesize());
-
-       /* Setup of a region of guest memory for the shadow VMCS. */
-       vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
-       vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
-       vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
-
-       /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
-       vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
-       vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
-       vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
-       memset(vmx->vmread_hva, 0, getpagesize());
-
-       vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
-       vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
-       vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
-       memset(vmx->vmwrite_hva, 0, getpagesize());
-
-       *p_vmx_gva = vmx_gva;
-       return vmx;
-}
-
-bool prepare_for_vmx_operation(struct vmx_pages *vmx)
-{
-       uint64_t feature_control;
-       uint64_t required;
-       unsigned long cr0;
-       unsigned long cr4;
-
-       /*
-        * Ensure bits in CR0 and CR4 are valid in VMX operation:
-        * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
-        * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
-        */
-       __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
-       cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
-       cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
-       __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
-
-       __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
-       cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
-       cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
-       /* Enable VMX operation */
-       cr4 |= X86_CR4_VMXE;
-       __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
-
-       /*
-        * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
-        *  Bit 0: Lock bit. If clear, VMXON causes a #GP.
-        *  Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
-        *    outside of SMX causes a #GP.
-        */
-       required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
-       required |= FEAT_CTL_LOCKED;
-       feature_control = rdmsr(MSR_IA32_FEAT_CTL);
-       if ((feature_control & required) != required)
-               wrmsr(MSR_IA32_FEAT_CTL, feature_control | required);
-
-       /* Enter VMX root operation. */
-       *(uint32_t *)(vmx->vmxon) = vmcs_revision();
-       if (vmxon(vmx->vmxon_gpa))
-               return false;
-
-       return true;
-}
-
-bool load_vmcs(struct vmx_pages *vmx)
-{
-       /* Load a VMCS. */
-       *(uint32_t *)(vmx->vmcs) = vmcs_revision();
-       if (vmclear(vmx->vmcs_gpa))
-               return false;
-
-       if (vmptrld(vmx->vmcs_gpa))
-               return false;
-
-       /* Setup shadow VMCS, do not load it yet. */
-       *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
-       if (vmclear(vmx->shadow_vmcs_gpa))
-               return false;
-
-       return true;
-}
-
-static bool ept_vpid_cap_supported(uint64_t mask)
-{
-       return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
-}
-
-bool ept_1g_pages_supported(void)
-{
-       return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
-}
-
-/*
- * Initialize the control fields to the most basic settings possible.
- */
-static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
-{
-       uint32_t sec_exec_ctl = 0;
-
-       vmwrite(VIRTUAL_PROCESSOR_ID, 0);
-       vmwrite(POSTED_INTR_NV, 0);
-
-       vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
-
-       if (vmx->eptp_gpa) {
-               uint64_t ept_paddr;
-               struct eptPageTablePointer eptp = {
-                       .memory_type = X86_MEMTYPE_WB,
-                       .page_walk_length = 3, /* + 1 */
-                       .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
-                       .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
-               };
-
-               memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
-               vmwrite(EPT_POINTER, ept_paddr);
-               sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
-       }
-
-       if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
-               vmwrite(CPU_BASED_VM_EXEC_CONTROL,
-                       rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
-       else {
-               vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
-               GUEST_ASSERT(!sec_exec_ctl);
-       }
-
-       vmwrite(EXCEPTION_BITMAP, 0);
-       vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
-       vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
-       vmwrite(CR3_TARGET_COUNT, 0);
-       vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
-               VM_EXIT_HOST_ADDR_SPACE_SIZE);    /* 64-bit host */
-       vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
-       vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
-       vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
-               VM_ENTRY_IA32E_MODE);             /* 64-bit guest */
-       vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
-       vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
-       vmwrite(TPR_THRESHOLD, 0);
-
-       vmwrite(CR0_GUEST_HOST_MASK, 0);
-       vmwrite(CR4_GUEST_HOST_MASK, 0);
-       vmwrite(CR0_READ_SHADOW, get_cr0());
-       vmwrite(CR4_READ_SHADOW, get_cr4());
-
-       vmwrite(MSR_BITMAP, vmx->msr_gpa);
-       vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
-       vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
-}
-
-/*
- * Initialize the host state fields based on the current host state, with
- * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
- * or vmresume.
- */
-static inline void init_vmcs_host_state(void)
-{
-       uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
-
-       vmwrite(HOST_ES_SELECTOR, get_es());
-       vmwrite(HOST_CS_SELECTOR, get_cs());
-       vmwrite(HOST_SS_SELECTOR, get_ss());
-       vmwrite(HOST_DS_SELECTOR, get_ds());
-       vmwrite(HOST_FS_SELECTOR, get_fs());
-       vmwrite(HOST_GS_SELECTOR, get_gs());
-       vmwrite(HOST_TR_SELECTOR, get_tr());
-
-       if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
-               vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
-       if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
-               vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
-       if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
-               vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
-                       rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
-
-       vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
-
-       vmwrite(HOST_CR0, get_cr0());
-       vmwrite(HOST_CR3, get_cr3());
-       vmwrite(HOST_CR4, get_cr4());
-       vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
-       vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
-       vmwrite(HOST_TR_BASE,
-               get_desc64_base((struct desc64 *)(get_gdt().address + get_tr())));
-       vmwrite(HOST_GDTR_BASE, get_gdt().address);
-       vmwrite(HOST_IDTR_BASE, get_idt().address);
-       vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
-       vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
-}
-
-/*
- * Initialize the guest state fields essentially as a clone of
- * the host state fields. Some host state fields have fixed
- * values, and we set the corresponding guest state fields accordingly.
- */
-static inline void init_vmcs_guest_state(void *rip, void *rsp)
-{
-       vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
-       vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
-       vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
-       vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
-       vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
-       vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
-       vmwrite(GUEST_LDTR_SELECTOR, 0);
-       vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
-       vmwrite(GUEST_INTR_STATUS, 0);
-       vmwrite(GUEST_PML_INDEX, 0);
-
-       vmwrite(VMCS_LINK_POINTER, -1ll);
-       vmwrite(GUEST_IA32_DEBUGCTL, 0);
-       vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
-       vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
-       vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
-               vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
-
-       vmwrite(GUEST_ES_LIMIT, -1);
-       vmwrite(GUEST_CS_LIMIT, -1);
-       vmwrite(GUEST_SS_LIMIT, -1);
-       vmwrite(GUEST_DS_LIMIT, -1);
-       vmwrite(GUEST_FS_LIMIT, -1);
-       vmwrite(GUEST_GS_LIMIT, -1);
-       vmwrite(GUEST_LDTR_LIMIT, -1);
-       vmwrite(GUEST_TR_LIMIT, 0x67);
-       vmwrite(GUEST_GDTR_LIMIT, 0xffff);
-       vmwrite(GUEST_IDTR_LIMIT, 0xffff);
-       vmwrite(GUEST_ES_AR_BYTES,
-               vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
-       vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
-       vmwrite(GUEST_SS_AR_BYTES, 0xc093);
-       vmwrite(GUEST_DS_AR_BYTES,
-               vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
-       vmwrite(GUEST_FS_AR_BYTES,
-               vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
-       vmwrite(GUEST_GS_AR_BYTES,
-               vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
-       vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
-       vmwrite(GUEST_TR_AR_BYTES, 0x8b);
-       vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
-       vmwrite(GUEST_ACTIVITY_STATE, 0);
-       vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
-       vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
-
-       vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
-       vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
-       vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
-       vmwrite(GUEST_ES_BASE, 0);
-       vmwrite(GUEST_CS_BASE, 0);
-       vmwrite(GUEST_SS_BASE, 0);
-       vmwrite(GUEST_DS_BASE, 0);
-       vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
-       vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
-       vmwrite(GUEST_LDTR_BASE, 0);
-       vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
-       vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
-       vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
-       vmwrite(GUEST_DR7, 0x400);
-       vmwrite(GUEST_RSP, (uint64_t)rsp);
-       vmwrite(GUEST_RIP, (uint64_t)rip);
-       vmwrite(GUEST_RFLAGS, 2);
-       vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
-       vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
-       vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
-}
-
-void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
-{
-       init_vmcs_control_fields(vmx);
-       init_vmcs_host_state();
-       init_vmcs_guest_state(guest_rip, guest_rsp);
-}
-
-static void nested_create_pte(struct kvm_vm *vm,
-                             struct eptPageTableEntry *pte,
-                             uint64_t nested_paddr,
-                             uint64_t paddr,
-                             int current_level,
-                             int target_level)
-{
-       if (!pte->readable) {
-               pte->writable = true;
-               pte->readable = true;
-               pte->executable = true;
-               pte->page_size = (current_level == target_level);
-               if (pte->page_size)
-                       pte->address = paddr >> vm->page_shift;
-               else
-                       pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
-       } else {
-               /*
-                * Entry already present.  Assert that the caller doesn't want
-                * a hugepage at this level, and that there isn't a hugepage at
-                * this level.
-                */
-               TEST_ASSERT(current_level != target_level,
-                           "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
-                           current_level, nested_paddr);
-               TEST_ASSERT(!pte->page_size,
-                           "Cannot create page table at level: %u, nested_paddr: 0x%lx",
-                           current_level, nested_paddr);
-       }
-}
-
-
-void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                    uint64_t nested_paddr, uint64_t paddr, int target_level)
-{
-       const uint64_t page_size = PG_LEVEL_SIZE(target_level);
-       struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
-       uint16_t index;
-
-       TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
-                   "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
-       TEST_ASSERT((nested_paddr >> 48) == 0,
-                   "Nested physical address 0x%lx requires 5-level paging",
-                   nested_paddr);
-       TEST_ASSERT((nested_paddr % page_size) == 0,
-                   "Nested physical address not on page boundary,\n"
-                   "  nested_paddr: 0x%lx page_size: 0x%lx",
-                   nested_paddr, page_size);
-       TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
-                   "Physical address beyond beyond maximum supported,\n"
-                   "  nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-                   paddr, vm->max_gfn, vm->page_size);
-       TEST_ASSERT((paddr % page_size) == 0,
-                   "Physical address not on page boundary,\n"
-                   "  paddr: 0x%lx page_size: 0x%lx",
-                   paddr, page_size);
-       TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
-                   "Physical address beyond beyond maximum supported,\n"
-                   "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-                   paddr, vm->max_gfn, vm->page_size);
-
-       for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
-               index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
-               pte = &pt[index];
-
-               nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
-
-               if (pte->page_size)
-                       break;
-
-               pt = addr_gpa2hva(vm, pte->address * vm->page_size);
-       }
-
-       /*
-        * For now mark these as accessed and dirty because the only
-        * testcase we have needs that.  Can be reconsidered later.
-        */
-       pte->accessed = true;
-       pte->dirty = true;
-
-}
-
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                  uint64_t nested_paddr, uint64_t paddr)
-{
-       __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
-}
-
-/*
- * Map a range of EPT guest physical addresses to the VM's physical address
- *
- * Input Args:
- *   vm - Virtual Machine
- *   nested_paddr - Nested guest physical address to map
- *   paddr - VM Physical Address
- *   size - The size of the range to map
- *   level - The level at which to map the range
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a nested guest translation for the
- * page range starting at nested_paddr to the page range starting at paddr.
- */
-void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-                 uint64_t nested_paddr, uint64_t paddr, uint64_t size,
-                 int level)
-{
-       size_t page_size = PG_LEVEL_SIZE(level);
-       size_t npages = size / page_size;
-
-       TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
-       TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
-
-       while (npages--) {
-               __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
-               nested_paddr += page_size;
-               paddr += page_size;
-       }
-}
-
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-               uint64_t nested_paddr, uint64_t paddr, uint64_t size)
-{
-       __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
-}
-
-/* Prepare an identity extended page table that maps all the
- * physical pages in VM.
- */
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
-                       uint32_t memslot)
-{
-       sparsebit_idx_t i, last;
-       struct userspace_mem_region *region =
-               memslot2region(vm, memslot);
-
-       i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
-       last = i + (region->region.memory_size >> vm->page_shift);
-       for (;;) {
-               i = sparsebit_next_clear(region->unused_phy_pages, i);
-               if (i > last)
-                       break;
-
-               nested_map(vmx, vm,
-                          (uint64_t)i << vm->page_shift,
-                          (uint64_t)i << vm->page_shift,
-                          1 << vm->page_shift);
-       }
-}
-
-/* Identity map a region with 1GiB Pages. */
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
-                           uint64_t addr, uint64_t size)
-{
-       __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
-}
-
-bool kvm_cpu_has_ept(void)
-{
-       uint64_t ctrl;
-
-       ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
-       if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
-               return false;
-
-       ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
-       return ctrl & SECONDARY_EXEC_ENABLE_EPT;
-}
-
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
-                 uint32_t eptp_memslot)
-{
-       TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
-
-       vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
-       vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
-       vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
-}
-
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
-{
-       vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
-       vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
-       vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
-}
diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c
new file mode 100644 (file)
index 0000000..e32dd59
--- /dev/null
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x CMMA migration
+ *
+ * Copyright IBM Corp. 2023
+ *
+ * Authors:
+ *  Nico Boehr <nrb@linux.ibm.com>
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+#define MAIN_PAGE_COUNT 512
+
+#define TEST_DATA_PAGE_COUNT 512
+#define TEST_DATA_MEMSLOT 1
+#define TEST_DATA_START_GFN PAGE_SIZE
+
+#define TEST_DATA_TWO_PAGE_COUNT 256
+#define TEST_DATA_TWO_MEMSLOT 2
+#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE)
+
+static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
+
+/**
+ * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
+ * so use_cmma goes on and the CMMA related ioctls do something.
+ */
+static void guest_do_one_essa(void)
+{
+       asm volatile(
+               /* load TEST_DATA_START_GFN into r1 */
+               "       llilf 1,%[start_gfn]\n"
+               /* calculate the address from the gfn */
+               "       sllg 1,1,12(0)\n"
+               /* set the first page in TEST_DATA memslot to STABLE */
+               "       .insn rrf,0xb9ab0000,2,1,1,0\n"
+               /* hypercall */
+               "       diag 0,0,0x501\n"
+               "0:     j 0b"
+               :
+               : [start_gfn] "L"(TEST_DATA_START_GFN)
+               : "r1", "r2", "memory", "cc"
+       );
+}
+
+/**
+ * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
+ * state.
+ */
+static void guest_dirty_test_data(void)
+{
+       asm volatile(
+               /* r1 = TEST_DATA_START_GFN */
+               "       xgr 1,1\n"
+               "       llilf 1,%[start_gfn]\n"
+               /* r5 = TEST_DATA_PAGE_COUNT */
+               "       lghi 5,%[page_count]\n"
+               /* r5 += r1 */
+               "2:     agfr 5,1\n"
+               /* r2 = r1 << PAGE_SHIFT */
+               "1:     sllg 2,1,12(0)\n"
+               /* essa(r4, r2, SET_STABLE) */
+               "       .insn rrf,0xb9ab0000,4,2,1,0\n"
+               /* i++ */
+               "       agfi 1,1\n"
+               /* if r1 < r5 goto 1 */
+               "       cgrjl 1,5,1b\n"
+               /* hypercall */
+               "       diag 0,0,0x501\n"
+               "0:     j 0b"
+               :
+               : [start_gfn] "L"(TEST_DATA_START_GFN),
+                 [page_count] "L"(TEST_DATA_PAGE_COUNT)
+               :
+                       /* the counter in our loop over the pages */
+                       "r1",
+                       /* the calculated page physical address */
+                       "r2",
+                       /* ESSA output register */
+                       "r4",
+                       /* last page */
+                       "r5",
+                       "cc", "memory"
+       );
+}
+
+static void create_main_memslot(struct kvm_vm *vm)
+{
+       int i;
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
+       /* set the array of memslots to zero like __vm_create does */
+       for (i = 0; i < NR_MEM_REGIONS; i++)
+               vm->memslots[i] = 0;
+}
+
+static void create_test_memslot(struct kvm_vm *vm)
+{
+       vm_userspace_mem_region_add(vm,
+                                   VM_MEM_SRC_ANONYMOUS,
+                                   TEST_DATA_START_GFN << vm->page_shift,
+                                   TEST_DATA_MEMSLOT,
+                                   TEST_DATA_PAGE_COUNT,
+                                   0
+                                  );
+       vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void create_memslots(struct kvm_vm *vm)
+{
+       /*
+        * Our VM has the following memory layout:
+        * +------+---------------------------+
+        * | GFN  | Memslot                   |
+        * +------+---------------------------+
+        * | 0    |                           |
+        * | ...  | MAIN (Code, Stack, ...)   |
+        * | 511  |                           |
+        * +------+---------------------------+
+        * | 4096 |                           |
+        * | ...  | TEST_DATA                 |
+        * | 4607 |                           |
+        * +------+---------------------------+
+        */
+       create_main_memslot(vm);
+       create_test_memslot(vm);
+}
+
+static void finish_vm_setup(struct kvm_vm *vm)
+{
+       struct userspace_mem_region *slot0;
+
+       kvm_vm_elf_load(vm, program_invocation_name);
+
+       slot0 = memslot2region(vm, 0);
+       ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+       kvm_arch_vm_post_create(vm);
+}
+
+static struct kvm_vm *create_vm_two_memslots(void)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create_barebones();
+
+       create_memslots(vm);
+
+       finish_vm_setup(vm);
+
+       return vm;
+}
+
+static void enable_cmma(struct kvm_vm *vm)
+{
+       int r;
+
+       r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
+       TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
+}
+
+static void enable_dirty_tracking(struct kvm_vm *vm)
+{
+       vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
+       vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+static int __enable_migration_mode(struct kvm_vm *vm)
+{
+       return __kvm_device_attr_set(vm->fd,
+                                    KVM_S390_VM_MIGRATION,
+                                    KVM_S390_VM_MIGRATION_START,
+                                    NULL
+                                   );
+}
+
+static void enable_migration_mode(struct kvm_vm *vm)
+{
+       int r = __enable_migration_mode(vm);
+
+       TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
+}
+
+static bool is_migration_mode_on(struct kvm_vm *vm)
+{
+       u64 out;
+       int r;
+
+       r = __kvm_device_attr_get(vm->fd,
+                                 KVM_S390_VM_MIGRATION,
+                                 KVM_S390_VM_MIGRATION_STATUS,
+                                 &out
+                                );
+       TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
+       return out;
+}
+
+static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
+{
+       struct kvm_s390_cmma_log args;
+       int rc;
+
+       errno = 0;
+
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = 0,
+               .count = sizeof(cmma_value_buf),
+               .flags = flags,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+
+       *errno_out = errno;
+       return rc;
+}
+
+static void test_get_cmma_basic(void)
+{
+       struct kvm_vm *vm = create_vm_two_memslots();
+       struct kvm_vcpu *vcpu;
+       int rc, errno_out;
+
+       /* GET_CMMA_BITS without CMMA enabled should fail */
+       rc = vm_get_cmma_bits(vm, 0, &errno_out);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, ENXIO);
+
+       enable_cmma(vm);
+       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+       vcpu_run(vcpu);
+
+       /* GET_CMMA_BITS without migration mode and without peeking should fail */
+       rc = vm_get_cmma_bits(vm, 0, &errno_out);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
+
+       /* GET_CMMA_BITS without migration mode and with peeking should work */
+       rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
+       TEST_ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(errno_out, 0);
+
+       enable_dirty_tracking(vm);
+       enable_migration_mode(vm);
+
+       /* GET_CMMA_BITS with invalid flags */
+       rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
+
+       kvm_vm_free(vm);
+}
+
+static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
+{
+       TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+}
+
+static void test_migration_mode(void)
+{
+       struct kvm_vm *vm = vm_create_barebones();
+       struct kvm_vcpu *vcpu;
+       u64 orig_psw;
+       int rc;
+
+       /* enabling migration mode on a VM without memory should fail */
+       rc = __enable_migration_mode(vm);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+       errno = 0;
+
+       create_memslots(vm);
+       finish_vm_setup(vm);
+
+       enable_cmma(vm);
+       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+       orig_psw = vcpu->run->psw_addr;
+
+       /*
+        * Execute one essa instruction in the guest. Otherwise the guest will
+        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+        */
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       /* migration mode when memslots have dirty tracking off should fail */
+       rc = __enable_migration_mode(vm);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+       errno = 0;
+
+       /* enable dirty tracking */
+       enable_dirty_tracking(vm);
+
+       /* enabling migration mode should work now */
+       rc = __enable_migration_mode(vm);
+       TEST_ASSERT_EQ(rc, 0);
+       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+       errno = 0;
+
+       /* execute another ESSA instruction to see this goes fine */
+       vcpu->run->psw_addr = orig_psw;
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       /*
+        * With migration mode on, create a new memslot with dirty tracking off.
+        * This should turn off migration mode.
+        */
+       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+       vm_userspace_mem_region_add(vm,
+                                   VM_MEM_SRC_ANONYMOUS,
+                                   TEST_DATA_TWO_START_GFN << vm->page_shift,
+                                   TEST_DATA_TWO_MEMSLOT,
+                                   TEST_DATA_TWO_PAGE_COUNT,
+                                   0
+                                  );
+       TEST_ASSERT(!is_migration_mode_on(vm),
+                   "creating memslot without dirty tracking turns off migration mode"
+                  );
+
+       /* ESSA instructions should still execute fine */
+       vcpu->run->psw_addr = orig_psw;
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       /*
+        * Turn on dirty tracking on the new memslot.
+        * It should be possible to turn migration mode back on again.
+        */
+       vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+       rc = __enable_migration_mode(vm);
+       TEST_ASSERT_EQ(rc, 0);
+       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+       errno = 0;
+
+       /*
+        * Turn off dirty tracking again, this time with just a flag change.
+        * Again, migration mode should turn off.
+        */
+       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+       vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
+       TEST_ASSERT(!is_migration_mode_on(vm),
+                   "disabling dirty tracking should turn off migration mode"
+                  );
+
+       /* ESSA instructions should still execute fine */
+       vcpu->run->psw_addr = orig_psw;
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+/**
+ * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
+ * CMMA attributes of all pages in both memslots and nothing more dirty.
+ * This has the useful side effect of ensuring nothing is CMMA dirty after this
+ * function.
+ */
+static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
+{
+       struct kvm_s390_cmma_log args;
+
+       /*
+        * First iteration - everything should be dirty.
+        * Start at the main memslot...
+        */
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = 0,
+               .count = sizeof(cmma_value_buf),
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+       TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
+
+       /* ...and then - after a hole - the TEST_DATA memslot should follow */
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = MAIN_PAGE_COUNT,
+               .count = sizeof(cmma_value_buf),
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+       TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+       TEST_ASSERT_EQ(args.remaining, 0);
+
+       /* ...and nothing else should be there */
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
+               .count = sizeof(cmma_value_buf),
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+       TEST_ASSERT_EQ(args.count, 0);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
+       TEST_ASSERT_EQ(args.remaining, 0);
+}
+
+/**
+ * Given a VM, assert no pages are CMMA dirty.
+ */
+static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
+{
+       struct kvm_s390_cmma_log args;
+
+       /* If we start from GFN 0 again, nothing should be dirty. */
+       args = (struct kvm_s390_cmma_log){
+               .start_gfn = 0,
+               .count = sizeof(cmma_value_buf),
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+       if (args.count || args.remaining || args.start_gfn)
+               TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
+                         args.start_gfn,
+                         args.count,
+                         args.remaining
+                        );
+}
+
+static void test_get_inital_dirty(void)
+{
+       struct kvm_vm *vm = create_vm_two_memslots();
+       struct kvm_vcpu *vcpu;
+
+       enable_cmma(vm);
+       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+       /*
+        * Execute one essa instruction in the guest. Otherwise the guest will
+        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+        */
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       enable_dirty_tracking(vm);
+       enable_migration_mode(vm);
+
+       assert_all_slots_cmma_dirty(vm);
+
+       /* Start from the beginning again and make sure nothing else is dirty */
+       assert_no_pages_cmma_dirty(vm);
+
+       kvm_vm_free(vm);
+}
+
+static void query_cmma_range(struct kvm_vm *vm,
+                            u64 start_gfn, u64 gfn_count,
+                            struct kvm_s390_cmma_log *res_out)
+{
+       *res_out = (struct kvm_s390_cmma_log){
+               .start_gfn = start_gfn,
+               .count = gfn_count,
+               .flags = 0,
+               .values = (__u64)&cmma_value_buf[0]
+       };
+       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
+}
+
+/**
+ * Assert the given cmma_log struct that was executed by query_cmma_range()
+ * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
+ * dirty_gfn_count CMMA values.
+ */
+static void assert_cmma_dirty(u64 first_dirty_gfn,
+                             u64 dirty_gfn_count,
+                             const struct kvm_s390_cmma_log *res)
+{
+       TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+       TEST_ASSERT_EQ(res->count, dirty_gfn_count);
+       for (size_t i = 0; i < dirty_gfn_count; i++)
+               TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+       TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+}
+
+static void test_get_skip_holes(void)
+{
+       size_t gfn_offset;
+       struct kvm_vm *vm = create_vm_two_memslots();
+       struct kvm_s390_cmma_log log;
+       struct kvm_vcpu *vcpu;
+       u64 orig_psw;
+
+       enable_cmma(vm);
+       vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
+
+       orig_psw = vcpu->run->psw_addr;
+
+       /*
+        * Execute some essa instructions in the guest. Otherwise the guest will
+        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+        */
+       vcpu_run(vcpu);
+       assert_exit_was_hypercall(vcpu);
+
+       enable_dirty_tracking(vm);
+       enable_migration_mode(vm);
+
+       /* un-dirty all pages */
+       assert_all_slots_cmma_dirty(vm);
+
+       /* Then, dirty just the TEST_DATA memslot */
+       vcpu->run->psw_addr = orig_psw;
+       vcpu_run(vcpu);
+
+       gfn_offset = TEST_DATA_START_GFN;
+       /**
+        * Query CMMA attributes of one page, starting at page 0. Since the
+        * main memslot was not touched by the VM, this should yield the first
+        * page of the TEST_DATA memslot.
+        * The dirty bitmap should now look like this:
+        * 0: not dirty
+        * [0x1, 0x200): dirty
+        */
+       query_cmma_range(vm, 0, 1, &log);
+       assert_cmma_dirty(gfn_offset, 1, &log);
+       gfn_offset++;
+
+       /**
+        * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
+        * memslot. This should wrap back to the beginning of the TEST_DATA
+        * memslot, page 1.
+        * The dirty bitmap should now look like this:
+        * [0, 0x21): not dirty
+        * [0x21, 0x200): dirty
+        */
+       query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
+       assert_cmma_dirty(gfn_offset, 0x20, &log);
+       gfn_offset += 0x20;
+
+       /* Skip 32 pages */
+       gfn_offset += 0x20;
+
+       /**
+        * After skipping 32 pages, query the next 32 (0x20) pages.
+        * The dirty bitmap should now look like this:
+        * [0, 0x21): not dirty
+        * [0x21, 0x41): dirty
+        * [0x41, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       query_cmma_range(vm, gfn_offset, 0x20, &log);
+       assert_cmma_dirty(gfn_offset, 0x20, &log);
+       gfn_offset += 0x20;
+
+       /**
+        * Query 1 page from the beginning of the TEST_DATA memslot. This should
+        * yield page 0x21.
+        * The dirty bitmap should now look like this:
+        * [0, 0x22): not dirty
+        * [0x22, 0x41): dirty
+        * [0x41, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
+       assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
+       gfn_offset++;
+
+       /**
+        * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
+        * This should yield pages [0x23, 0x33).
+        * The dirty bitmap should now look like this:
+        * [0, 0x22): not dirty
+        * 0x22: dirty
+        * [0x23, 0x33): not dirty
+        * [0x33, 0x41): dirty
+        * [0x41, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       gfn_offset = TEST_DATA_START_GFN + 0x23;
+       query_cmma_range(vm, gfn_offset, 15, &log);
+       assert_cmma_dirty(gfn_offset, 15, &log);
+
+       /**
+        * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
+        * This should yield page [0x22, 0x33)
+        * The dirty bitmap should now look like this:
+        * [0, 0x33): not dirty
+        * [0x33, 0x41): dirty
+        * [0x41, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       gfn_offset = TEST_DATA_START_GFN + 0x22;
+       query_cmma_range(vm, gfn_offset, 17, &log);
+       assert_cmma_dirty(gfn_offset, 17, &log);
+
+       /**
+        * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
+        * This should yield page 0x40 and nothing more, since there are more
+        * than 16 non-dirty pages after page 0x40.
+        * The dirty bitmap should now look like this:
+        * [0, 0x33): not dirty
+        * [0x33, 0x40): dirty
+        * [0x40, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       gfn_offset = TEST_DATA_START_GFN + 0x40;
+       query_cmma_range(vm, gfn_offset, 25, &log);
+       assert_cmma_dirty(gfn_offset, 1, &log);
+
+       /**
+        * Query pages [0x33, 0x40).
+        * The dirty bitmap should now look like this:
+        * [0, 0x61): not dirty
+        * [0x61, 0x200): dirty
+        */
+       gfn_offset = TEST_DATA_START_GFN + 0x33;
+       query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
+       assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
+
+       /**
+        * Query the remaining pages [0x61, 0x200).
+        */
+       gfn_offset = TEST_DATA_START_GFN;
+       query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
+       assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
+
+       assert_no_pages_cmma_dirty(vm);
+}
+
+struct testdef {
+       const char *name;
+       void (*test)(void);
+} testlist[] = {
+       { "migration mode and dirty tracking", test_migration_mode },
+       { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
+       { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+       { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
+};
+
+/**
+ * The kernel may support CMMA, but the machine may not (i.e. if running as
+ * guest-3).
+ *
+ * In this case, the CMMA capabilities are all there, but the CMMA-related
+ * ioctls fail. To find out whether the machine supports CMMA, create a
+ * temporary VM and then query the CMMA feature of the VM.
+ */
+static int machine_has_cmma(void)
+{
+       struct kvm_vm *vm = vm_create_barebones();
+       int r;
+
+       r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
+       kvm_vm_free(vm);
+
+       return r;
+}
+
+int main(int argc, char *argv[])
+{
+       int idx;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
+       TEST_REQUIRE(machine_has_cmma());
+
+       ksft_print_header();
+
+       ksft_set_plan(ARRAY_SIZE(testlist));
+
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               testlist[idx].test();
+               ksft_test_result_pass("%s\n", testlist[idx].name);
+       }
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/config b/tools/testing/selftests/kvm/s390/config
new file mode 100644 (file)
index 0000000..23270f2
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_KVM=y
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
new file mode 100644 (file)
index 0000000..2725588
--- /dev/null
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Hariharan Mari <hari55@linux.ibm.com>
+ *
+ * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those
+ * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction
+ * query data reported via the CPU Model, allowing us to directly compare it with the data
+ * acquired through executing the queries in the test.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include "facility.h"
+
+#include "kvm_util.h"
+
+#define PLO_FUNCTION_MAX 256
+
+/* Query available CPU subfunctions */
+struct kvm_s390_vm_cpu_subfunc cpu_subfunc;
+
+static void get_cpu_machine_subfuntions(struct kvm_vm *vm,
+                                       struct kvm_s390_vm_cpu_subfunc *cpu_subfunc)
+{
+       int r;
+
+       r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL,
+                                 KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc);
+
+       TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+       unsigned long function = nr | 0x100;
+       int cc;
+
+       asm volatile("  lgr     0,%[function]\n"
+                       /* Parameter registers are ignored for "test bit" */
+                       "       plo     0,0,0,0(0)\n"
+                       "       ipm     %0\n"
+                       "       srl     %0,28\n"
+                       : "=d" (cc)
+                       : [function] "d" (function)
+                       : "cc", "0");
+       return cc == 0;
+}
+
+/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */
+static void test_plo_asm_block(u8 (*query)[32])
+{
+       for (int i = 0; i < PLO_FUNCTION_MAX; ++i) {
+               if (plo_test_bit(i))
+                       (*query)[i >> 3] |= 0x80 >> (i & 7);
+       }
+}
+
+/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */
+static void test_kmac_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb91e0000,0,2\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */
+static void test_kmc_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92f0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */
+static void test_km_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92e0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */
+static void test_kimd_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb93e0000,0,2\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */
+static void test_klmd_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb93f0000,0,2\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */
+static void test_kmctr_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rrf,0xb92d0000,2,4,6,0\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */
+static void test_kmf_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92a0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */
+static void test_kmo_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92b0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */
+static void test_pcc_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb92c0000,0,0\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */
+static void test_prno_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb93c0000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */
+static void test_kma_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rrf,0xb9290000,2,4,6,0\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */
+static void test_kdsa_asm_block(u8 (*query)[16])
+{
+       asm volatile("  la      %%r1,%[query]\n"
+                       "       xgr     %%r0,%%r0\n"
+                       "       .insn   rre,0xb93a0000,0,2\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "r0", "r1");
+}
+
+/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */
+static void test_sortl_asm_block(u8 (*query)[32])
+{
+       asm volatile("  lghi    0,0\n"
+                       "       la      1,%[query]\n"
+                       "       .insn   rre,0xb9380000,2,4\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "0", "1");
+}
+
+/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */
+static void test_dfltcc_asm_block(u8 (*query)[32])
+{
+       asm volatile("  lghi    0,0\n"
+                       "       la      1,%[query]\n"
+                       "       .insn   rrf,0xb9390000,2,4,6,0\n"
+                       : [query] "=R" (*query)
+                       :
+                       : "cc", "0", "1");
+}
+
+/*
+ * Testing Perform Function with Concurrent Results (PFCR)
+ * CPU subfunctions's ASM block
+ */
+static void test_pfcr_asm_block(u8 (*query)[16])
+{
+       asm volatile("  lghi    0,0\n"
+                       "       .insn   rsy,0xeb0000000016,0,0,%[query]\n"
+                       : [query] "=QS" (*query)
+                       :
+                       : "cc", "0");
+}
+
+typedef void (*testfunc_t)(u8 (*array)[]);
+
+struct testdef {
+       const char *subfunc_name;
+       u8 *subfunc_array;
+       size_t array_size;
+       testfunc_t test;
+       int facility_bit;
+} testlist[] = {
+       /*
+        * PLO was introduced in the very first 64-bit machine generation.
+        * Hence it is assumed PLO is always installed in Z Arch.
+        */
+       { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 },
+       /* MSA - Facility bit 17 */
+       { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 },
+       { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 },
+       { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 },
+       { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 },
+       { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 },
+       /* MSA - Facility bit 77 */
+       { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 },
+       { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 },
+       { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 },
+       { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 },
+       /* MSA5 - Facility bit 57 */
+       { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 },
+       /* MSA8 - Facility bit 146 */
+       { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 },
+       /* MSA9 - Facility bit 155 */
+       { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 },
+       /* SORTL - Facility bit 150 */
+       { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 },
+       /* DFLTCC - Facility bit 151 */
+       { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 },
+       /* Concurrent-function facility - Facility bit 201 */
+       { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 },
+};
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       int idx;
+
+       ksft_print_header();
+
+       vm = vm_create(1);
+
+       memset(&cpu_subfunc, 0, sizeof(cpu_subfunc));
+       get_cpu_machine_subfuntions(vm, &cpu_subfunc);
+
+       ksft_set_plan(ARRAY_SIZE(testlist));
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               if (test_facility(testlist[idx].facility_bit)) {
+                       u8 *array = malloc(testlist[idx].array_size);
+
+                       testlist[idx].test((u8 (*)[testlist[idx].array_size])array);
+
+                       TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array,
+                                             array, testlist[idx].array_size), 0);
+
+                       ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
+                       free(array);
+               } else {
+                       ksft_test_result_skip("%s feature is not avaialable\n",
+                                             testlist[idx].subfunc_name);
+               }
+       }
+
+       kvm_vm_free(vm);
+       ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c
new file mode 100644 (file)
index 0000000..ad80959
--- /dev/null
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+    "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+                                     size_t new_psw_off, uint64_t *new_psw)
+{
+       struct kvm_guest_debug debug = {};
+       struct kvm_regs regs;
+       struct kvm_vm *vm;
+       char *lowcore;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       lowcore = addr_gpa2hva(vm, 0);
+       new_psw[0] = (*vcpu)->run->psw_mask;
+       new_psw[1] = (uint64_t)int_handler;
+       memcpy(lowcore + new_psw_off, new_psw, 16);
+       vcpu_regs_get(*vcpu, &regs);
+       regs.gprs[2] = -1;
+       vcpu_regs_set(*vcpu, &regs);
+       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+       vcpu_guest_debug_set(*vcpu, &debug);
+       vcpu_run(*vcpu);
+
+       return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+    ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+    "j .\n");
+
+static void test_step_pgm(void)
+{
+       test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+    "diag %r0,%r0,0\n"
+    "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+       struct kvm_s390_irq irq = {
+               .type = KVM_S390_PROGRAM_INT,
+               .u.pgm.code = PGM_SPECIFICATION,
+       };
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+                            __LC_PGM_NEW_PSW, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+       vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+    "iske %r2,%r2\n"
+    "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+       test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+    "lctl %c0,%c0,1\n"
+    "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+       test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+    "svc 0\n"
+    "j .\n");
+
+static void test_step_svc(void)
+{
+       test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+       const char *name;
+       void (*test)(void);
+} testlist[] = {
+       { "single-step pgm", test_step_pgm },
+       { "single-step pgm caused by diag", test_step_pgm_diag },
+       { "single-step pgm caused by iske", test_step_pgm_iske },
+       { "single-step pgm caused by lctl", test_step_pgm_lctl },
+       { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+       int idx;
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(testlist));
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               testlist[idx].test();
+               ksft_test_result_pass("%s\n", testlist[idx].name);
+       }
+       ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c
new file mode 100644 (file)
index 0000000..4374b4c
--- /dev/null
@@ -0,0 +1,1187 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x KVM_S390_MEM_OP
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include <linux/bits.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+enum mop_target {
+       LOGICAL,
+       SIDA,
+       ABSOLUTE,
+       INVALID,
+};
+
+enum mop_access_mode {
+       READ,
+       WRITE,
+       CMPXCHG,
+};
+
+struct mop_desc {
+       uintptr_t gaddr;
+       uintptr_t gaddr_v;
+       uint64_t set_flags;
+       unsigned int f_check : 1;
+       unsigned int f_inject : 1;
+       unsigned int f_key : 1;
+       unsigned int _gaddr_v : 1;
+       unsigned int _set_flags : 1;
+       unsigned int _sida_offset : 1;
+       unsigned int _ar : 1;
+       uint32_t size;
+       enum mop_target target;
+       enum mop_access_mode mode;
+       void *buf;
+       uint32_t sida_offset;
+       void *old;
+       uint8_t old_value[16];
+       bool *cmpxchg_success;
+       uint8_t ar;
+       uint8_t key;
+};
+
+const uint8_t NO_KEY = 0xff;
+
+static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
+{
+       struct kvm_s390_mem_op ksmo = {
+               .gaddr = (uintptr_t)desc->gaddr,
+               .size = desc->size,
+               .buf = ((uintptr_t)desc->buf),
+               .reserved = "ignored_ignored_ignored_ignored"
+       };
+
+       switch (desc->target) {
+       case LOGICAL:
+               if (desc->mode == READ)
+                       ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
+               if (desc->mode == WRITE)
+                       ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+               break;
+       case SIDA:
+               if (desc->mode == READ)
+                       ksmo.op = KVM_S390_MEMOP_SIDA_READ;
+               if (desc->mode == WRITE)
+                       ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
+               break;
+       case ABSOLUTE:
+               if (desc->mode == READ)
+                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ;
+               if (desc->mode == WRITE)
+                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
+               if (desc->mode == CMPXCHG) {
+                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG;
+                       ksmo.old_addr = (uint64_t)desc->old;
+                       memcpy(desc->old_value, desc->old, desc->size);
+               }
+               break;
+       case INVALID:
+               ksmo.op = -1;
+       }
+       if (desc->f_check)
+               ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY;
+       if (desc->f_inject)
+               ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION;
+       if (desc->_set_flags)
+               ksmo.flags = desc->set_flags;
+       if (desc->f_key && desc->key != NO_KEY) {
+               ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION;
+               ksmo.key = desc->key;
+       }
+       if (desc->_ar)
+               ksmo.ar = desc->ar;
+       else
+               ksmo.ar = 0;
+       if (desc->_sida_offset)
+               ksmo.sida_offset = desc->sida_offset;
+
+       return ksmo;
+}
+
+struct test_info {
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+};
+
+#define PRINT_MEMOP false
+static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo)
+{
+       if (!PRINT_MEMOP)
+               return;
+
+       if (!vcpu)
+               printf("vm memop(");
+       else
+               printf("vcpu memop(");
+       switch (ksmo->op) {
+       case KVM_S390_MEMOP_LOGICAL_READ:
+               printf("LOGICAL, READ, ");
+               break;
+       case KVM_S390_MEMOP_LOGICAL_WRITE:
+               printf("LOGICAL, WRITE, ");
+               break;
+       case KVM_S390_MEMOP_SIDA_READ:
+               printf("SIDA, READ, ");
+               break;
+       case KVM_S390_MEMOP_SIDA_WRITE:
+               printf("SIDA, WRITE, ");
+               break;
+       case KVM_S390_MEMOP_ABSOLUTE_READ:
+               printf("ABSOLUTE, READ, ");
+               break;
+       case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+               printf("ABSOLUTE, WRITE, ");
+               break;
+       case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+               printf("ABSOLUTE, CMPXCHG, ");
+               break;
+       }
+       printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx",
+              ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
+              ksmo->old_addr);
+       if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
+               printf(", CHECK_ONLY");
+       if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
+               printf(", INJECT_EXCEPTION");
+       if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION)
+               printf(", SKEY_PROTECTION");
+       puts(")");
+}
+
+static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+                          struct mop_desc *desc)
+{
+       struct kvm_vcpu *vcpu = info.vcpu;
+
+       if (!vcpu)
+               return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
+       else
+               return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
+}
+
+static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+                       struct mop_desc *desc)
+{
+       int r;
+
+       r = err_memop_ioctl(info, ksmo, desc);
+       if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) {
+               if (desc->cmpxchg_success) {
+                       int diff = memcmp(desc->old_value, desc->old, desc->size);
+                       *desc->cmpxchg_success = !diff;
+               }
+       }
+       TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
+}
+
+#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...)    \
+({                                                                             \
+       struct test_info __info = (info_p);                                     \
+       struct mop_desc __desc = {                                              \
+               .target = (mop_target_p),                                       \
+               .mode = (access_mode_p),                                        \
+               .buf = (buf_p),                                                 \
+               .size = (size_p),                                               \
+               __VA_ARGS__                                                     \
+       };                                                                      \
+       struct kvm_s390_mem_op __ksmo;                                          \
+                                                                               \
+       if (__desc._gaddr_v) {                                                  \
+               if (__desc.target == ABSOLUTE)                                  \
+                       __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \
+               else                                                            \
+                       __desc.gaddr = __desc.gaddr_v;                          \
+       }                                                                       \
+       __ksmo = ksmo_from_desc(&__desc);                                       \
+       print_memop(__info.vcpu, &__ksmo);                                      \
+       err##memop_ioctl(__info, &__ksmo, &__desc);                             \
+})
+
+#define MOP(...) MEMOP(, __VA_ARGS__)
+#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__)
+
+#define GADDR(a) .gaddr = ((uintptr_t)a)
+#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v)
+#define CHECK_ONLY .f_check = 1
+#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f)
+#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
+#define AR(a) ._ar = 1, .ar = (a)
+#define KEY(a) .f_key = 1, .key = (a)
+#define INJECT .f_inject = 1
+#define CMPXCHG_OLD(o) .old = (o)
+#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s)
+
+#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
+
+#define CR0_FETCH_PROTECTION_OVERRIDE  (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE        (1UL << (63 - 39))
+
+static uint8_t __aligned(PAGE_SIZE) mem1[65536];
+static uint8_t __aligned(PAGE_SIZE) mem2[65536];
+
+struct test_default {
+       struct kvm_vm *kvm_vm;
+       struct test_info vm;
+       struct test_info vcpu;
+       struct kvm_run *run;
+       int size;
+};
+
+static struct test_default test_default_init(void *guest_code)
+{
+       struct kvm_vcpu *vcpu;
+       struct test_default t;
+
+       t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
+       t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       t.vm = (struct test_info) { t.kvm_vm, NULL };
+       t.vcpu = (struct test_info) { t.kvm_vm, vcpu };
+       t.run = vcpu->run;
+       return t;
+}
+
+enum stage {
+       /* Synced state set by host, e.g. DAT */
+       STAGE_INITED,
+       /* Guest did nothing */
+       STAGE_IDLED,
+       /* Guest set storage keys (specifics up to test case) */
+       STAGE_SKEYS_SET,
+       /* Guest copied memory (locations up to test case) */
+       STAGE_COPIED,
+       /* End of guest code reached */
+       STAGE_DONE,
+};
+
+#define HOST_SYNC(info_p, stage)                                       \
+({                                                                     \
+       struct test_info __info = (info_p);                             \
+       struct kvm_vcpu *__vcpu = __info.vcpu;                          \
+       struct ucall uc;                                                \
+       int __stage = (stage);                                          \
+                                                                       \
+       vcpu_run(__vcpu);                                               \
+       get_ucall(__vcpu, &uc);                                         \
+       if (uc.cmd == UCALL_ABORT) {                                    \
+               REPORT_GUEST_ASSERT(uc);                                \
+       }                                                               \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                             \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                            \
+})                                                                     \
+
+static void prepare_mem12(void)
+{
+       int i;
+
+       for (i = 0; i < sizeof(mem1); i++)
+               mem1[i] = rand();
+       memset(mem2, 0xaa, sizeof(mem2));
+}
+
+#define ASSERT_MEM_EQ(p1, p2, size) \
+       TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
+
+static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu,
+                              enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+       prepare_mem12();
+       CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size,
+                  GADDR_V(mem1), KEY(key));
+       HOST_SYNC(copy_cpu, STAGE_COPIED);
+       CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+                  GADDR_V(mem2), KEY(key));
+       ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
+                        enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+       prepare_mem12();
+       CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1));
+       HOST_SYNC(copy_cpu, STAGE_COPIED);
+       CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+                  GADDR_V(mem2), KEY(key));
+       ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_cmpxchg(struct test_default *test, uint8_t key)
+{
+       for (int size = 1; size <= 16; size *= 2) {
+               for (int offset = 0; offset < 16; offset += size) {
+                       uint8_t __aligned(16) new[16] = {};
+                       uint8_t __aligned(16) old[16];
+                       bool succ;
+
+                       prepare_mem12();
+                       default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY);
+
+                       memcpy(&old, mem1, 16);
+                       MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+                           size, GADDR_V(mem1 + offset),
+                           CMPXCHG_OLD(old + offset),
+                           CMPXCHG_SUCCESS(&succ), KEY(key));
+                       HOST_SYNC(test->vcpu, STAGE_COPIED);
+                       MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+                       TEST_ASSERT(succ, "exchange of values should succeed");
+                       memcpy(mem1 + offset, new + offset, size);
+                       ASSERT_MEM_EQ(mem1, mem2, 16);
+
+                       memcpy(&old, mem1, 16);
+                       new[offset]++;
+                       old[offset]++;
+                       MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+                           size, GADDR_V(mem1 + offset),
+                           CMPXCHG_OLD(old + offset),
+                           CMPXCHG_SUCCESS(&succ), KEY(key));
+                       HOST_SYNC(test->vcpu, STAGE_COPIED);
+                       MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+                       TEST_ASSERT(!succ, "exchange of values should not succeed");
+                       ASSERT_MEM_EQ(mem1, mem2, 16);
+                       ASSERT_MEM_EQ(&old, mem1, 16);
+               }
+       }
+}
+
+static void guest_copy(void)
+{
+       GUEST_SYNC(STAGE_INITED);
+       memcpy(&mem2, &mem1, sizeof(mem2));
+       GUEST_SYNC(STAGE_COPIED);
+}
+
+static void test_copy(void)
+{
+       struct test_default t = test_default_init(guest_copy);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_access_register(void)
+{
+       struct test_default t = test_default_init(guest_copy);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+
+       prepare_mem12();
+       t.run->psw_mask &= ~(3UL << (63 - 17));
+       t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
+
+       /*
+        * Primary address space gets used if an access register
+        * contains zero. The host makes use of AR[1] so is a good
+        * candidate to ensure the guest AR (of zero) is used.
+        */
+       CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size,
+                  GADDR_V(mem1), AR(1));
+       HOST_SYNC(t.vcpu, STAGE_COPIED);
+
+       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size,
+                  GADDR_V(mem2), AR(1));
+       ASSERT_MEM_EQ(mem1, mem2, t.size);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void set_storage_key_range(void *addr, size_t len, uint8_t key)
+{
+       uintptr_t _addr, abs, i;
+       int not_mapped = 0;
+
+       _addr = (uintptr_t)addr;
+       for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
+               abs = i;
+               asm volatile (
+                              "lra     %[abs], 0(0,%[abs])\n"
+                       "       jz      0f\n"
+                       "       llill   %[not_mapped],1\n"
+                       "       j       1f\n"
+                       "0:     sske    %[key], %[abs]\n"
+                       "1:"
+                       : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped)
+                       : [key] "r" (key)
+                       : "cc"
+               );
+               GUEST_ASSERT_EQ(not_mapped, 0);
+       }
+}
+
+static void guest_copy_key(void)
+{
+       set_storage_key_range(mem1, sizeof(mem1), 0x90);
+       set_storage_key_range(mem2, sizeof(mem2), 0x90);
+       GUEST_SYNC(STAGE_SKEYS_SET);
+
+       for (;;) {
+               memcpy(&mem2, &mem1, sizeof(mem2));
+               GUEST_SYNC(STAGE_COPIED);
+       }
+}
+
+static void test_copy_key(void)
+{
+       struct test_default t = test_default_init(guest_copy_key);
+
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vm, no key */
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY);
+
+       /* vm/vcpu, machting key or key 0 */
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0);
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0);
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+       /*
+        * There used to be different code paths for key handling depending on
+        * if the region crossed a page boundary.
+        * There currently are not, but the more tests the merrier.
+        */
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0);
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9);
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0);
+       default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9);
+
+       /* vm/vcpu, mismatching keys on read, but no fetch protection */
+       default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+       default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_cmpxchg_key(void)
+{
+       struct test_default t = test_default_init(guest_copy_key);
+
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       default_cmpxchg(&t, NO_KEY);
+       default_cmpxchg(&t, 0);
+       default_cmpxchg(&t, 9);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static __uint128_t cut_to_size(int size, __uint128_t val)
+{
+       switch (size) {
+       case 1:
+               return (uint8_t)val;
+       case 2:
+               return (uint16_t)val;
+       case 4:
+               return (uint32_t)val;
+       case 8:
+               return (uint64_t)val;
+       case 16:
+               return val;
+       }
+       GUEST_FAIL("Invalid size = %u", size);
+       return 0;
+}
+
+static bool popcount_eq(__uint128_t a, __uint128_t b)
+{
+       unsigned int count_a, count_b;
+
+       count_a = __builtin_popcountl((uint64_t)(a >> 64)) +
+                 __builtin_popcountl((uint64_t)a);
+       count_b = __builtin_popcountl((uint64_t)(b >> 64)) +
+                 __builtin_popcountl((uint64_t)b);
+       return count_a == count_b;
+}
+
+static __uint128_t rotate(int size, __uint128_t val, int amount)
+{
+       unsigned int bits = size * 8;
+
+       amount = (amount + bits) % bits;
+       val = cut_to_size(size, val);
+       if (!amount)
+               return val;
+       return (val << (bits - amount)) | (val >> amount);
+}
+
+const unsigned int max_block = 16;
+
+static void choose_block(bool guest, int i, int *size, int *offset)
+{
+       unsigned int rand;
+
+       rand = i;
+       if (guest) {
+               rand = rand * 19 + 11;
+               *size = 1 << ((rand % 3) + 2);
+               rand = rand * 19 + 11;
+               *offset = (rand % max_block) & ~(*size - 1);
+       } else {
+               rand = rand * 17 + 5;
+               *size = 1 << (rand % 5);
+               rand = rand * 17 + 5;
+               *offset = (rand % max_block) & ~(*size - 1);
+       }
+}
+
+static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old)
+{
+       unsigned int rand;
+       int amount;
+       bool swap;
+
+       rand = i;
+       rand = rand * 3 + 1;
+       if (guest)
+               rand = rand * 3 + 1;
+       swap = rand % 2 == 0;
+       if (swap) {
+               int i, j;
+               __uint128_t new;
+               uint8_t byte0, byte1;
+
+               rand = rand * 3 + 1;
+               i = rand % size;
+               rand = rand * 3 + 1;
+               j = rand % size;
+               if (i == j)
+                       return old;
+               new = rotate(16, old, i * 8);
+               byte0 = new & 0xff;
+               new &= ~0xff;
+               new = rotate(16, new, -i * 8);
+               new = rotate(16, new, j * 8);
+               byte1 = new & 0xff;
+               new = (new & ~0xff) | byte0;
+               new = rotate(16, new, -j * 8);
+               new = rotate(16, new, i * 8);
+               new = new | byte1;
+               new = rotate(16, new, -i * 8);
+               return new;
+       }
+       rand = rand * 3 + 1;
+       amount = rand % (size * 8);
+       return rotate(size, old, amount);
+}
+
+static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new)
+{
+       bool ret;
+
+       switch (size) {
+       case 4: {
+                       uint32_t old = *old_addr;
+
+                       asm volatile ("cs %[old],%[new],%[address]"
+                           : [old] "+d" (old),
+                             [address] "+Q" (*(uint32_t *)(target))
+                           : [new] "d" ((uint32_t)new)
+                           : "cc"
+                       );
+                       ret = old == (uint32_t)*old_addr;
+                       *old_addr = old;
+                       return ret;
+               }
+       case 8: {
+                       uint64_t old = *old_addr;
+
+                       asm volatile ("csg %[old],%[new],%[address]"
+                           : [old] "+d" (old),
+                             [address] "+Q" (*(uint64_t *)(target))
+                           : [new] "d" ((uint64_t)new)
+                           : "cc"
+                       );
+                       ret = old == (uint64_t)*old_addr;
+                       *old_addr = old;
+                       return ret;
+               }
+       case 16: {
+                       __uint128_t old = *old_addr;
+
+                       asm volatile ("cdsg %[old],%[new],%[address]"
+                           : [old] "+d" (old),
+                             [address] "+Q" (*(__uint128_t *)(target))
+                           : [new] "d" (new)
+                           : "cc"
+                       );
+                       ret = old == *old_addr;
+                       *old_addr = old;
+                       return ret;
+               }
+       }
+       GUEST_FAIL("Invalid size = %u", size);
+       return 0;
+}
+
+const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000;
+
+static void guest_cmpxchg_key(void)
+{
+       int size, offset;
+       __uint128_t old, new;
+
+       set_storage_key_range(mem1, max_block, 0x10);
+       set_storage_key_range(mem2, max_block, 0x10);
+       GUEST_SYNC(STAGE_SKEYS_SET);
+
+       for (int i = 0; i < cmpxchg_iter_outer; i++) {
+               do {
+                       old = 1;
+               } while (!_cmpxchg(16, mem1, &old, 0));
+               for (int j = 0; j < cmpxchg_iter_inner; j++) {
+                       choose_block(true, i + j, &size, &offset);
+                       do {
+                               new = permutate_bits(true, i + j, size, old);
+                       } while (!_cmpxchg(size, mem2 + offset, &old, new));
+               }
+       }
+
+       GUEST_SYNC(STAGE_DONE);
+}
+
+static void *run_guest(void *data)
+{
+       struct test_info *info = data;
+
+       HOST_SYNC(*info, STAGE_DONE);
+       return NULL;
+}
+
+static char *quad_to_char(__uint128_t *quad, int size)
+{
+       return ((char *)quad) + (sizeof(*quad) - size);
+}
+
+static void test_cmpxchg_key_concurrent(void)
+{
+       struct test_default t = test_default_init(guest_cmpxchg_key);
+       int size, offset;
+       __uint128_t old, new;
+       bool success;
+       pthread_t thread;
+
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+       prepare_mem12();
+       MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2));
+       pthread_create(&thread, NULL, run_guest, &t.vcpu);
+
+       for (int i = 0; i < cmpxchg_iter_outer; i++) {
+               do {
+                       old = 0;
+                       new = 1;
+                       MOP(t.vm, ABSOLUTE, CMPXCHG, &new,
+                           sizeof(new), GADDR_V(mem1),
+                           CMPXCHG_OLD(&old),
+                           CMPXCHG_SUCCESS(&success), KEY(1));
+               } while (!success);
+               for (int j = 0; j < cmpxchg_iter_inner; j++) {
+                       choose_block(false, i + j, &size, &offset);
+                       do {
+                               new = permutate_bits(false, i + j, size, old);
+                               MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size),
+                                   size, GADDR_V(mem2 + offset),
+                                   CMPXCHG_OLD(quad_to_char(&old, size)),
+                                   CMPXCHG_SUCCESS(&success), KEY(1));
+                       } while (!success);
+               }
+       }
+
+       pthread_join(thread, NULL);
+
+       MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2));
+       TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2),
+                   "Must retain number of set bits");
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_copy_key_fetch_prot(void)
+{
+       /*
+        * For some reason combining the first sync with override enablement
+        * results in an exception when calling HOST_SYNC.
+        */
+       GUEST_SYNC(STAGE_INITED);
+       /* Storage protection override applies to both store and fetch. */
+       set_storage_key_range(mem1, sizeof(mem1), 0x98);
+       set_storage_key_range(mem2, sizeof(mem2), 0x98);
+       GUEST_SYNC(STAGE_SKEYS_SET);
+
+       for (;;) {
+               memcpy(&mem2, &mem1, sizeof(mem2));
+               GUEST_SYNC(STAGE_COPIED);
+       }
+}
+
+static void test_copy_key_storage_prot_override(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vcpu, mismatching keys, storage protection override in effect */
+       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_key_fetch_prot(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vm/vcpu, matching key, fetch protection in effect */
+       default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+       default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+#define ERR_PROT_MOP(...)                                                      \
+({                                                                             \
+       int rv;                                                                 \
+                                                                               \
+       rv = ERR_MOP(__VA_ARGS__);                                              \
+       TEST_ASSERT(rv == 4, "Should result in protection exception");          \
+})
+
+static void guest_error_key(void)
+{
+       GUEST_SYNC(STAGE_INITED);
+       set_storage_key_range(mem1, PAGE_SIZE, 0x18);
+       set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98);
+       GUEST_SYNC(STAGE_SKEYS_SET);
+       GUEST_SYNC(STAGE_IDLED);
+}
+
+static void test_errors_key(void)
+{
+       struct test_default t = test_default_init(guest_error_key);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vm/vcpu, mismatching keys, fetch protection in effect */
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg_key(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+       int i;
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       for (i = 1; i <= 16; i *= 2) {
+               __uint128_t old = 0;
+
+               ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2),
+                            CMPXCHG_OLD(&old), KEY(2));
+       }
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_termination(void)
+{
+       struct test_default t = test_default_init(guest_error_key);
+       uint64_t prefix;
+       uint64_t teid;
+       uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
+       uint64_t psw[2];
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vcpu, mismatching keys after first page */
+       ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT);
+       /*
+        * The memop injected a program exception and the test needs to check the
+        * Translation-Exception Identification (TEID). It is necessary to run
+        * the guest in order to be able to read the TEID from guest memory.
+        * Set the guest program new PSW, so the guest state is not clobbered.
+        */
+       prefix = t.run->s.regs.prefix;
+       psw[0] = t.run->psw_mask;
+       psw[1] = t.run->psw_addr;
+       MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464));
+       HOST_SYNC(t.vcpu, STAGE_IDLED);
+       MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
+       /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
+       TEST_ASSERT_EQ(teid & teid_mask, 0);
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_storage_prot_override(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vm, mismatching keys, storage protection override not applicable to vm */
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+const uint64_t last_page_addr = -PAGE_SIZE;
+
+static void guest_copy_key_fetch_prot_override(void)
+{
+       int i;
+       char *page_0 = 0;
+
+       GUEST_SYNC(STAGE_INITED);
+       set_storage_key_range(0, PAGE_SIZE, 0x18);
+       set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0);
+       asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc");
+       GUEST_SYNC(STAGE_SKEYS_SET);
+
+       for (;;) {
+               for (i = 0; i < PAGE_SIZE; i++)
+                       page_0[i] = mem1[i];
+               GUEST_SYNC(STAGE_COPIED);
+       }
+}
+
+static void test_copy_key_fetch_prot_override(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+       vm_vaddr_t guest_0_page, guest_last_page;
+
+       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+               print_skip("did not allocate guest pages at required positions");
+               goto out;
+       }
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vcpu, mismatching keys on fetch, fetch protection override applies */
+       prepare_mem12();
+       MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1));
+       HOST_SYNC(t.vcpu, STAGE_COPIED);
+       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+       ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+       /*
+        * vcpu, mismatching keys on fetch, fetch protection override applies,
+        * wraparound
+        */
+       prepare_mem12();
+       MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page));
+       HOST_SYNC(t.vcpu, STAGE_COPIED);
+       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048,
+                  GADDR_V(guest_last_page), KEY(2));
+       ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+out:
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_not_enabled(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+       vm_vaddr_t guest_0_page, guest_last_page;
+
+       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+               print_skip("did not allocate guest pages at required positions");
+               goto out;
+       }
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /* vcpu, mismatching keys on fetch, fetch protection override not enabled */
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2));
+
+out:
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_enabled(void)
+{
+       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+       vm_vaddr_t guest_0_page, guest_last_page;
+
+       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+               print_skip("did not allocate guest pages at required positions");
+               goto out;
+       }
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+       t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+       /*
+        * vcpu, mismatching keys on fetch,
+        * fetch protection override does not apply because memory range exceeded
+        */
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
+                  GADDR_V(guest_last_page), KEY(2));
+       /* vm, fetch protected override does not apply */
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2));
+       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+
+out:
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_idle(void)
+{
+       GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
+       for (;;)
+               GUEST_SYNC(STAGE_IDLED);
+}
+
+static void _test_errors_common(struct test_info info, enum mop_target target, int size)
+{
+       int rv;
+
+       /* Bad size: */
+       rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1));
+       TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
+
+       /* Zero size: */
+       rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1));
+       TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
+                   "ioctl allows 0 as size");
+
+       /* Bad flags: */
+       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
+
+       /* Bad guest address: */
+       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
+       TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY");
+       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL));
+       TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write");
+
+       /* Bad host address: */
+       rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
+       TEST_ASSERT(rv == -1 && errno == EFAULT,
+                   "ioctl does not report bad host memory address");
+
+       /* Bad key: */
+       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
+}
+
+static void test_errors(void)
+{
+       struct test_default t = test_default_init(guest_idle);
+       int rv;
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+
+       _test_errors_common(t.vcpu, LOGICAL, t.size);
+       _test_errors_common(t.vm, ABSOLUTE, t.size);
+
+       /* Bad operation: */
+       rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+       /* virtual addresses are not translated when passing INVALID */
+       rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+
+       /* Bad access register: */
+       t.run->psw_mask &= ~(3UL << (63 - 17));
+       t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
+       HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */
+       rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
+       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
+       t.run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
+       HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */
+
+       /* Check that the SIDA calls are rejected for non-protected guests */
+       rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+       TEST_ASSERT(rv == -1 && errno == EINVAL,
+                   "ioctl does not reject SIDA_READ in non-protected mode");
+       rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+       TEST_ASSERT(rv == -1 && errno == EINVAL,
+                   "ioctl does not reject SIDA_WRITE in non-protected mode");
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg(void)
+{
+       struct test_default t = test_default_init(guest_idle);
+       __uint128_t old;
+       int rv, i, power = 1;
+
+       HOST_SYNC(t.vcpu, STAGE_INITED);
+
+       for (i = 0; i < 32; i++) {
+               if (i == power) {
+                       power *= 2;
+                       continue;
+               }
+               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1),
+                            CMPXCHG_OLD(&old));
+               TEST_ASSERT(rv == -1 && errno == EINVAL,
+                           "ioctl allows bad size for cmpxchg");
+       }
+       for (i = 1; i <= 16; i *= 2) {
+               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL),
+                            CMPXCHG_OLD(&old));
+               TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg");
+       }
+       for (i = 2; i <= 16; i *= 2) {
+               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1),
+                            CMPXCHG_OLD(&old));
+               TEST_ASSERT(rv == -1 && errno == EINVAL,
+                           "ioctl allows bad alignment for cmpxchg");
+       }
+
+       kvm_vm_free(t.kvm_vm);
+}
+
+int main(int argc, char *argv[])
+{
+       int extension_cap, idx;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
+       extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
+
+       struct testdef {
+               const char *name;
+               void (*test)(void);
+               bool requirements_met;
+       } testlist[] = {
+               {
+                       .name = "simple copy",
+                       .test = test_copy,
+                       .requirements_met = true,
+               },
+               {
+                       .name = "generic error checks",
+                       .test = test_errors,
+                       .requirements_met = true,
+               },
+               {
+                       .name = "copy with storage keys",
+                       .test = test_copy_key,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "cmpxchg with storage keys",
+                       .test = test_cmpxchg_key,
+                       .requirements_met = extension_cap & 0x2,
+               },
+               {
+                       .name = "concurrently cmpxchg with storage keys",
+                       .test = test_cmpxchg_key_concurrent,
+                       .requirements_met = extension_cap & 0x2,
+               },
+               {
+                       .name = "copy with key storage protection override",
+                       .test = test_copy_key_storage_prot_override,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "copy with key fetch protection",
+                       .test = test_copy_key_fetch_prot,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "copy with key fetch protection override",
+                       .test = test_copy_key_fetch_prot_override,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "copy with access register mode",
+                       .test = test_copy_access_register,
+                       .requirements_met = true,
+               },
+               {
+                       .name = "error checks with key",
+                       .test = test_errors_key,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "error checks for cmpxchg with key",
+                       .test = test_errors_cmpxchg_key,
+                       .requirements_met = extension_cap & 0x2,
+               },
+               {
+                       .name = "error checks for cmpxchg",
+                       .test = test_errors_cmpxchg,
+                       .requirements_met = extension_cap & 0x2,
+               },
+               {
+                       .name = "termination",
+                       .test = test_termination,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "error checks with key storage protection override",
+                       .test = test_errors_key_storage_prot_override,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "error checks without key fetch prot override",
+                       .test = test_errors_key_fetch_prot_override_not_enabled,
+                       .requirements_met = extension_cap > 0,
+               },
+               {
+                       .name = "error checks with key fetch prot override",
+                       .test = test_errors_key_fetch_prot_override_enabled,
+                       .requirements_met = extension_cap > 0,
+               },
+       };
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(testlist));
+
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               if (testlist[idx].requirements_met) {
+                       testlist[idx].test();
+                       ksft_test_result_pass("%s\n", testlist[idx].name);
+               } else {
+                       ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n",
+                                             testlist[idx].name, extension_cap);
+               }
+       }
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/resets.c b/tools/testing/selftests/kvm/s390/resets.c
new file mode 100644 (file)
index 0000000..b58f75b
--- /dev/null
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test for s390x CPU resets
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+
+#define LOCAL_IRQS 32
+
+#define ARBITRARY_NON_ZERO_VCPU_ID 3
+
+struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
+
+static uint8_t regs_null[512];
+
+static void guest_code_initial(void)
+{
+       /* set several CRs to "safe" value */
+       unsigned long cr2_59 = 0x10;    /* enable guarded storage */
+       unsigned long cr8_63 = 0x1;     /* monitor mask = 1 */
+       unsigned long cr10 = 1;         /* PER START */
+       unsigned long cr11 = -1;        /* PER END */
+
+
+       /* Dirty registers */
+       asm volatile (
+               "       lghi    2,0x11\n"       /* Round toward 0 */
+               "       sfpc    2\n"            /* set fpc to !=0 */
+               "       lctlg   2,2,%0\n"
+               "       lctlg   8,8,%1\n"
+               "       lctlg   10,10,%2\n"
+               "       lctlg   11,11,%3\n"
+               /* now clobber some general purpose regs */
+               "       llihh   0,0xffff\n"
+               "       llihl   1,0x5555\n"
+               "       llilh   2,0xaaaa\n"
+               "       llill   3,0x0000\n"
+               /* now clobber a floating point reg */
+               "       lghi    4,0x1\n"
+               "       cdgbr   0,4\n"
+               /* now clobber an access reg */
+               "       sar     9,4\n"
+               /* We embed diag 501 here to control register content */
+               "       diag 0,0,0x501\n"
+               :
+               : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11)
+               /* no clobber list as this should not return */
+               );
+}
+
+static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
+{
+       uint64_t eval_reg;
+
+       eval_reg = vcpu_get_reg(vcpu, id);
+       TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
+}
+
+static void assert_noirq(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_irq_state irq_state;
+       int irqs;
+
+       irq_state.len = sizeof(buf);
+       irq_state.buf = (unsigned long)buf;
+       irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state);
+       /*
+        * irqs contains the number of retrieved interrupts. Any interrupt
+        * (notably, the emergency call interrupt we have injected) should
+        * be cleared by the resets, so this should be 0.
+        */
+       TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
+       TEST_ASSERT(!irqs, "IRQ pending");
+}
+
+static void assert_clear(struct kvm_vcpu *vcpu)
+{
+       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+       struct kvm_fpu fpu;
+
+       vcpu_regs_get(vcpu, &regs);
+       TEST_ASSERT(!memcmp(&regs.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
+
+       vcpu_sregs_get(vcpu, &sregs);
+       TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
+
+       vcpu_fpu_get(vcpu, &fpu);
+       TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
+
+       /* sync regs */
+       TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)),
+                   "gprs0-15 == 0 (sync_regs)");
+
+       TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)),
+                   "acrs0-15 == 0 (sync_regs)");
+
+       TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)),
+                   "vrs0-15 == 0 (sync_regs)");
+}
+
+static void assert_initial_noclear(struct kvm_vcpu *vcpu)
+{
+       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
+       TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
+                   "gpr0 == 0xffff000000000000 (sync_regs)");
+       TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
+                   "gpr1 == 0x0000555500000000 (sync_regs)");
+       TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL,
+                   "gpr2 == 0x00000000aaaa0000 (sync_regs)");
+       TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL,
+                   "gpr3 == 0x0000000000000000 (sync_regs)");
+       TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL,
+                   "fpr0 == 0f1 (sync_regs)");
+       TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
+}
+
+static void assert_initial(struct kvm_vcpu *vcpu)
+{
+       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+       struct kvm_sregs sregs;
+       struct kvm_fpu fpu;
+
+       /* KVM_GET_SREGS */
+       vcpu_sregs_get(vcpu, &sregs);
+       TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
+       TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
+                   "cr14 == 0xC2000000 (KVM_GET_SREGS)");
+       TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
+                   "cr1-13 == 0 (KVM_GET_SREGS)");
+       TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)");
+
+       /* sync regs */
+       TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL,
+                   "cr14 == 0xC2000000 (sync_regs)");
+       TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12),
+                   "cr1-13 == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)");
+       TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
+
+       /* kvm_run */
+       TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
+       TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
+
+       vcpu_fpu_get(vcpu, &fpu);
+       TEST_ASSERT(!fpu.fpc, "fpc == 0");
+
+       test_one_reg(vcpu, KVM_REG_S390_GBEA, 1);
+       test_one_reg(vcpu, KVM_REG_S390_PP, 0);
+       test_one_reg(vcpu, KVM_REG_S390_TODPR, 0);
+       test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0);
+       test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0);
+}
+
+static void assert_normal_noclear(struct kvm_vcpu *vcpu)
+{
+       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
+       TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
+       TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
+}
+
+static void assert_normal(struct kvm_vcpu *vcpu)
+{
+       test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+       TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID,
+                       "pft == 0xff.....  (sync_regs)");
+       assert_noirq(vcpu);
+}
+
+static void inject_irq(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_irq_state irq_state;
+       struct kvm_s390_irq *irq = &buf[0];
+       int irqs;
+
+       /* Inject IRQ */
+       irq_state.len = sizeof(struct kvm_s390_irq);
+       irq_state.buf = (unsigned long)buf;
+       irq->type = KVM_S390_INT_EMERGENCY;
+       irq->u.emerg.code = vcpu->id;
+       irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
+       TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
+}
+
+static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create(1);
+
+       *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial);
+
+       return vm;
+}
+
+static void test_normal(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       ksft_print_msg("Testing normal reset\n");
+       vm = create_vm(&vcpu);
+
+       vcpu_run(vcpu);
+
+       inject_irq(vcpu);
+
+       vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL);
+
+       /* must clears */
+       assert_normal(vcpu);
+       /* must not clears */
+       assert_normal_noclear(vcpu);
+       assert_initial_noclear(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void test_initial(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       ksft_print_msg("Testing initial reset\n");
+       vm = create_vm(&vcpu);
+
+       vcpu_run(vcpu);
+
+       inject_irq(vcpu);
+
+       vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL);
+
+       /* must clears */
+       assert_normal(vcpu);
+       assert_initial(vcpu);
+       /* must not clears */
+       assert_initial_noclear(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void test_clear(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       ksft_print_msg("Testing clear reset\n");
+       vm = create_vm(&vcpu);
+
+       vcpu_run(vcpu);
+
+       inject_irq(vcpu);
+
+       vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL);
+
+       /* must clears */
+       assert_normal(vcpu);
+       assert_initial(vcpu);
+       assert_clear(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+struct testdef {
+       const char *name;
+       void (*test)(void);
+       bool needs_cap;
+} testlist[] = {
+       { "initial", test_initial, false },
+       { "normal", test_normal, true },
+       { "clear", test_clear, true },
+};
+
+int main(int argc, char *argv[])
+{
+       bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
+       int idx;
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(testlist));
+
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
+                       testlist[idx].test();
+                       ksft_test_result_pass("%s\n", testlist[idx].name);
+               } else {
+                       ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
+                                             testlist[idx].name);
+               }
+       }
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
new file mode 100644 (file)
index 0000000..bba0d9a
--- /dev/null
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test shared zeropage handling (with/without storage keys)
+ *
+ * Copyright (C) 2024, Red Hat, Inc.
+ */
+#include <sys/mman.h>
+
+#include <linux/fs.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+static void set_storage_key(void *addr, uint8_t skey)
+{
+       asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static void guest_code(void)
+{
+       /* Issue some storage key instruction. */
+       set_storage_key((void *)0, 0x98);
+       GUEST_DONE();
+}
+
+/*
+ * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
+ * Returns < 0 on error or if nothing is mapped.
+ */
+static int maps_shared_zeropage(int pagemap_fd, void *addr)
+{
+       struct page_region region;
+       struct pm_scan_arg arg = {
+               .start = (uintptr_t)addr,
+               .end = (uintptr_t)addr + 4096,
+               .vec = (uintptr_t)&region,
+               .vec_len = 1,
+               .size = sizeof(struct pm_scan_arg),
+               .category_mask = PAGE_IS_PFNZERO,
+               .category_anyof_mask = PAGE_IS_PRESENT,
+               .return_mask = PAGE_IS_PFNZERO,
+       };
+       return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+int main(int argc, char *argv[])
+{
+       char *mem, *page0, *page1, *page2, tmp;
+       const size_t pagesize = getpagesize();
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int pagemap_fd;
+
+       ksft_print_header();
+       ksft_set_plan(3);
+
+       /*
+        * We'll use memory that is not mapped into the VM for simplicity.
+        * Shared zeropages are enabled/disabled per-process.
+        */
+       mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+       TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+
+       /* Disable THP. Ignore errors on older kernels. */
+       madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
+
+       page0 = mem;
+       page1 = page0 + pagesize;
+       page2 = page1 + pagesize;
+
+       /* Can we even detect shared zeropages? */
+       pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+       TEST_REQUIRE(pagemap_fd >= 0);
+
+       tmp = *page0;
+       asm volatile("" : "+r" (tmp));
+       TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       /* Verify that we get the shared zeropage after VM creation. */
+       tmp = *page1;
+       asm volatile("" : "+r" (tmp));
+       ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
+                        "Shared zeropages should be enabled\n");
+
+       /*
+        * Let our VM execute a storage key instruction that should
+        * unshare all shared zeropages.
+        */
+       vcpu_run(vcpu);
+       get_ucall(vcpu, &uc);
+       TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
+
+       /* Verify that we don't have a shared zeropage anymore. */
+       ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
+                        "Shared zeropage should be gone\n");
+
+       /* Verify that we don't get any new shared zeropages. */
+       tmp = *page2;
+       asm volatile("" : "+r" (tmp));
+       ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
+                        "Shared zeropages should be disabled\n");
+
+       kvm_vm_free(vm);
+
+       ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c
new file mode 100644 (file)
index 0000000..53def35
--- /dev/null
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x KVM_CAP_SYNC_REGS
+ *
+ * Based on the same test for x86:
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Adaptions for s390x:
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "diag318_test_handler.h"
+#include "kselftest.h"
+
+static void guest_code(void)
+{
+       /*
+        * We embed diag 501 here instead of doing a ucall to avoid that
+        * the compiler has messed with r11 at the time of the ucall.
+        */
+       asm volatile (
+               "0:     diag 0,0,0x501\n"
+               "       ahi 11,1\n"
+               "       j 0b\n"
+       );
+}
+
+#define REG_COMPARE(reg) \
+       TEST_ASSERT(left->reg == right->reg, \
+                   "Register " #reg \
+                   " values did not match: 0x%llx, 0x%llx", \
+                   left->reg, right->reg)
+
+#define REG_COMPARE32(reg) \
+       TEST_ASSERT(left->reg == right->reg, \
+                   "Register " #reg \
+                   " values did not match: 0x%x, 0x%x", \
+                   left->reg, right->reg)
+
+
+static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
+{
+       int i;
+
+       for (i = 0; i < 16; i++)
+               REG_COMPARE(gprs[i]);
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
+{
+       int i;
+
+       for (i = 0; i < 16; i++)
+               REG_COMPARE32(acrs[i]);
+
+       for (i = 0; i < 16; i++)
+               REG_COMPARE(crs[i]);
+}
+
+#undef REG_COMPARE
+
+#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
+#define INVALID_SYNC_FIELD 0x80000000
+
+void test_read_invalid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Request reading invalid register set from VCPU. */
+       run->kvm_valid_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_valid_regs = 0;
+
+       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_valid_regs = 0;
+}
+
+void test_set_invalid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Request setting invalid register set into VCPU. */
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_dirty_regs = 0;
+
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_dirty_regs = 0;
+}
+
+void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+       int rv;
+
+       /* Request and verify all valid register sets. */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
+                   (run->s390_sieic.ipa >> 8) == 0x83 &&
+                   (run->s390_sieic.ipb >> 16) == 0x501,
+                   "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
+                   run->s390_sieic.icptcode, run->s390_sieic.ipa,
+                   run->s390_sieic.ipb);
+
+       vcpu_regs_get(vcpu, &regs);
+       compare_regs(&regs, &run->s.regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+       int rv;
+
+       /* Set and verify various register values */
+       run->s.regs.gprs[11] = 0xBAD1DEA;
+       run->s.regs.acrs[0] = 1 << 11;
+
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
+
+       if (get_diag318_info() > 0) {
+               run->s.regs.diag318 = get_diag318_info();
+               run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
+       }
+
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
+                   "r11 sync regs value incorrect 0x%llx.",
+                   run->s.regs.gprs[11]);
+       TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
+                   "acr0 sync regs value incorrect 0x%x.",
+                   run->s.regs.acrs[0]);
+       TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
+                   "diag318 sync regs value incorrect 0x%llx.",
+                   run->s.regs.diag318);
+
+       vcpu_regs_get(vcpu, &regs);
+       compare_regs(&regs, &run->s.regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Clear kvm_dirty_regs bits, verify new s.regs values are
+        * overwritten with existing guest values.
+        */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       run->kvm_dirty_regs = 0;
+       run->s.regs.gprs[11] = 0xDEADBEEF;
+       run->s.regs.diag318 = 0x4B1D;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
+                   "r11 sync regs value incorrect 0x%llx.",
+                   run->s.regs.gprs[11]);
+       TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
+                   "diag318 sync regs value incorrect 0x%llx.",
+                   run->s.regs.diag318);
+}
+
+struct testdef {
+       const char *name;
+       void (*test)(struct kvm_vcpu *vcpu);
+} testlist[] = {
+       { "read invalid", test_read_invalid },
+       { "set invalid", test_set_invalid },
+       { "request+verify all valid regs", test_req_and_verify_all_valid_regs },
+       { "set+verify various regs", test_set_and_verify_various_reg_values },
+       { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
+};
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       int idx;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+
+       ksft_print_header();
+
+       ksft_set_plan(ARRAY_SIZE(testlist));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               testlist[idx].test(vcpu);
+               ksft_test_result_pass("%s\n", testlist[idx].name);
+       }
+
+       kvm_vm_free(vm);
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c
new file mode 100644 (file)
index 0000000..12d5e1c
--- /dev/null
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test TEST PROTECTION emulation.
+ *
+ * Copyright IBM Corp. 2021
+ */
+#include <sys/mman.h>
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+#include "processor.h"
+
+#define CR0_FETCH_PROTECTION_OVERRIDE  (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE        (1UL << (63 - 39))
+
+static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
+static uint8_t *const page_store_prot = pages[0];
+static uint8_t *const page_fetch_prot = pages[1];
+
+/* Nonzero return value indicates that address not mapped */
+static int set_storage_key(void *addr, uint8_t key)
+{
+       int not_mapped = 0;
+
+       asm volatile (
+                      "lra     %[addr], 0(0,%[addr])\n"
+               "       jz      0f\n"
+               "       llill   %[not_mapped],1\n"
+               "       j       1f\n"
+               "0:     sske    %[key], %[addr]\n"
+               "1:"
+               : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
+               : [key] "r" (key)
+               : "cc"
+       );
+       return -not_mapped;
+}
+
+enum permission {
+       READ_WRITE = 0,
+       READ = 1,
+       RW_PROTECTED = 2,
+       TRANSL_UNAVAIL = 3,
+};
+
+static enum permission test_protection(void *addr, uint8_t key)
+{
+       uint64_t mask;
+
+       asm volatile (
+                      "tprot   %[addr], 0(%[key])\n"
+               "       ipm     %[mask]\n"
+               : [mask] "=r" (mask)
+               : [addr] "Q" (*(char *)addr),
+                 [key] "a" (key)
+               : "cc"
+       );
+
+       return (enum permission)(mask >> 28);
+}
+
+enum stage {
+       STAGE_INIT_SIMPLE,
+       TEST_SIMPLE,
+       STAGE_INIT_FETCH_PROT_OVERRIDE,
+       TEST_FETCH_PROT_OVERRIDE,
+       TEST_STORAGE_PROT_OVERRIDE,
+       STAGE_END       /* must be the last entry (it's the amount of tests) */
+};
+
+struct test {
+       enum stage stage;
+       void *addr;
+       uint8_t key;
+       enum permission expected;
+} tests[] = {
+       /*
+        * We perform each test in the array by executing TEST PROTECTION on
+        * the specified addr with the specified key and checking if the returned
+        * permissions match the expected value.
+        * Both guest and host cooperate to set up the required test conditions.
+        * A central condition is that the page targeted by addr has to be DAT
+        * protected in the host mappings, in order for KVM to emulate the
+        * TEST PROTECTION instruction.
+        * Since the page tables are shared, the host uses mprotect to achieve
+        * this.
+        *
+        * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted
+        * by SIE, not KVM, but there is no harm in testing them also.
+        * See Enhanced Suppression-on-Protection Facilities in the
+        * Interpretive-Execution Mode
+        */
+       /*
+        * guest: set storage key of page_store_prot to 1
+        *        storage key of page_fetch_prot to 9 and enable
+        *        protection for it
+        * STAGE_INIT_SIMPLE
+        * host: write protect both via mprotect
+        */
+       /* access key 0 matches any storage key -> RW */
+       { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
+       /* access key matches storage key -> RW */
+       { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
+       /* mismatched keys, but no fetch protection -> RO */
+       { TEST_SIMPLE, page_store_prot, 0x20, READ },
+       /* access key 0 matches any storage key -> RW */
+       { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
+       /* access key matches storage key -> RW */
+       { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
+       /* mismatched keys, fetch protection -> inaccessible */
+       { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED },
+       /* page 0 not mapped yet -> translation not available */
+       { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL },
+       /*
+        * host: try to map page 0
+        * guest: set storage key of page 0 to 9 and enable fetch protection
+        * STAGE_INIT_FETCH_PROT_OVERRIDE
+        * host: write protect page 0
+        *       enable fetch protection override
+        */
+       /* mismatched keys, fetch protection, but override applies -> RO */
+       { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
+       /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */
+       { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED },
+       /*
+        * host: enable storage protection override
+        */
+       /* mismatched keys, but override applies (storage key 9) -> RW */
+       { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
+       /* mismatched keys, no fetch protection, override doesn't apply -> RO */
+       { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
+       /* mismatched keys, but override applies (storage key 9) -> RW */
+       { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
+       /* end marker */
+       { STAGE_END, 0, 0, 0 },
+};
+
+static enum stage perform_next_stage(int *i, bool mapped_0)
+{
+       enum stage stage = tests[*i].stage;
+       enum permission result;
+       bool skip;
+
+       for (; tests[*i].stage == stage; (*i)++) {
+               /*
+                * Some fetch protection override tests require that page 0
+                * be mapped, however, when the hosts tries to map that page via
+                * vm_vaddr_alloc, it may happen that some other page gets mapped
+                * instead.
+                * In order to skip these tests we detect this inside the guest
+                */
+               skip = tests[*i].addr < (void *)PAGE_SIZE &&
+                      tests[*i].expected != TRANSL_UNAVAIL &&
+                      !mapped_0;
+               if (!skip) {
+                       result = test_protection(tests[*i].addr, tests[*i].key);
+                       __GUEST_ASSERT(result == tests[*i].expected,
+                                      "Wanted %u, got %u, for i = %u",
+                                      tests[*i].expected, result, *i);
+               }
+       }
+       return stage;
+}
+
+static void guest_code(void)
+{
+       bool mapped_0;
+       int i = 0;
+
+       GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
+       GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
+       GUEST_SYNC(STAGE_INIT_SIMPLE);
+       GUEST_SYNC(perform_next_stage(&i, false));
+
+       /* Fetch-protection override */
+       mapped_0 = !set_storage_key((void *)0, 0x98);
+       GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
+       GUEST_SYNC(perform_next_stage(&i, mapped_0));
+
+       /* Storage-protection override */
+       GUEST_SYNC(perform_next_stage(&i, mapped_0));
+}
+
+#define HOST_SYNC_NO_TAP(vcpup, stage)                         \
+({                                                             \
+       struct kvm_vcpu *__vcpu = (vcpup);                      \
+       struct ucall uc;                                        \
+       int __stage = (stage);                                  \
+                                                               \
+       vcpu_run(__vcpu);                                       \
+       get_ucall(__vcpu, &uc);                                 \
+       if (uc.cmd == UCALL_ABORT)                              \
+               REPORT_GUEST_ASSERT(uc);                        \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                     \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                    \
+})
+
+#define HOST_SYNC(vcpu, stage)                 \
+({                                             \
+       HOST_SYNC_NO_TAP(vcpu, stage);          \
+       ksft_test_result_pass("" #stage "\n");  \
+})
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       vm_vaddr_t guest_0_page;
+
+       ksft_print_header();
+       ksft_set_plan(STAGE_END);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
+       mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
+       HOST_SYNC(vcpu, TEST_SIMPLE);
+
+       guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
+       if (guest_0_page != 0) {
+               /* Use NO_TAP so we don't get a PASS print */
+               HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+               ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
+                                     "Did not allocate page at 0\n");
+       } else {
+               HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+       }
+       if (guest_0_page == 0)
+               mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
+       run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+       run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
+
+       run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+       run->kvm_dirty_regs = KVM_SYNC_CRS;
+       HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE);
+
+       kvm_vm_free(vm);
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
new file mode 100644 (file)
index 0000000..0c11231
--- /dev/null
@@ -0,0 +1,638 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test code for the s390x kvm ucontrol interface
+ *
+ * Copyright IBM Corp. 2024
+ *
+ * Authors:
+ *  Christoph Schlameuss <schlameuss@linux.ibm.com>
+ */
+#include "debug_print.h"
+#include "kselftest_harness.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sie.h"
+
+#include <linux/capability.h>
+#include <linux/sizes.h>
+
+#define PGM_SEGMENT_TRANSLATION 0x10
+
+#define VM_MEM_SIZE (4 * SZ_1M)
+#define VM_MEM_EXT_SIZE (2 * SZ_1M)
+#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M)
+
+/* so directly declare capget to check caps without libcap */
+int capget(cap_user_header_t header, cap_user_data_t data);
+
+/**
+ * In order to create user controlled virtual machines on S390,
+ * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL
+ * as privileged user (SYS_ADMIN).
+ */
+void require_ucontrol_admin(void)
+{
+       struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+       struct __user_cap_header_struct hdr = {
+               .version = _LINUX_CAPABILITY_VERSION_3,
+       };
+       int rc;
+
+       rc = capget(&hdr, data);
+       TEST_ASSERT_EQ(0, rc);
+       TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0);
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
+}
+
+/* Test program setting some registers and looping */
+extern char test_gprs_asm[];
+asm("test_gprs_asm:\n"
+       "xgr    %r0, %r0\n"
+       "lgfi   %r1,1\n"
+       "lgfi   %r2,2\n"
+       "lgfi   %r3,3\n"
+       "lgfi   %r4,4\n"
+       "lgfi   %r5,5\n"
+       "lgfi   %r6,6\n"
+       "lgfi   %r7,7\n"
+       "0:\n"
+       "       diag    0,0,0x44\n"
+       "       ahi     %r0,1\n"
+       "       j       0b\n"
+);
+
+/* Test program manipulating memory */
+extern char test_mem_asm[];
+asm("test_mem_asm:\n"
+       "xgr    %r0, %r0\n"
+
+       "0:\n"
+       "       ahi     %r0,1\n"
+       "       st      %r1,0(%r5,%r6)\n"
+
+       "       xgr     %r1,%r1\n"
+       "       l       %r1,0(%r5,%r6)\n"
+       "       ahi     %r0,1\n"
+       "       diag    0,0,0x44\n"
+
+       "       j       0b\n"
+);
+
+/* Test program manipulating storage keys */
+extern char test_skey_asm[];
+asm("test_skey_asm:\n"
+       "xgr    %r0, %r0\n"
+
+       "0:\n"
+       "       ahi     %r0,1\n"
+       "       st      %r1,0(%r5,%r6)\n"
+
+       "       iske    %r1,%r6\n"
+       "       ahi     %r0,1\n"
+       "       diag    0,0,0x44\n"
+
+       "       sske    %r1,%r6\n"
+       "       xgr     %r1,%r1\n"
+       "       iske    %r1,%r6\n"
+       "       ahi     %r0,1\n"
+       "       diag    0,0,0x44\n"
+
+       "       rrbe    %r1,%r6\n"
+       "       iske    %r1,%r6\n"
+       "       ahi     %r0,1\n"
+       "       diag    0,0,0x44\n"
+
+       "       j       0b\n"
+);
+
+FIXTURE(uc_kvm)
+{
+       struct kvm_s390_sie_block *sie_block;
+       struct kvm_run *run;
+       uintptr_t base_gpa;
+       uintptr_t code_gpa;
+       uintptr_t base_hva;
+       uintptr_t code_hva;
+       int kvm_run_size;
+       vm_paddr_t pgd;
+       void *vm_mem;
+       int vcpu_fd;
+       int kvm_fd;
+       int vm_fd;
+};
+
+/**
+ * create VM with single vcpu, map kvm_run and SIE control block for easy access
+ */
+FIXTURE_SETUP(uc_kvm)
+{
+       struct kvm_s390_vm_cpu_processor info;
+       int rc;
+
+       require_ucontrol_admin();
+
+       self->kvm_fd = open_kvm_dev_path_or_exit();
+       self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+       ASSERT_GE(self->vm_fd, 0);
+
+       kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL,
+                           KVM_S390_VM_CPU_PROCESSOR, &info);
+       TH_LOG("create VM 0x%llx", info.cpuid);
+
+       self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
+       ASSERT_GE(self->vcpu_fd, 0);
+
+       self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+       ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
+                 TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
+       self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
+                   PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
+       ASSERT_NE(self->run, MAP_FAILED);
+       /**
+        * For virtual cpus that have been created with S390 user controlled
+        * virtual machines, the resulting vcpu fd can be memory mapped at page
+        * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
+        * the virtual cpu's hardware control block.
+        */
+       self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
+                         PROT_READ | PROT_WRITE, MAP_SHARED,
+                         self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
+       ASSERT_NE(self->sie_block, MAP_FAILED);
+
+       TH_LOG("VM created %p %p", self->run, self->sie_block);
+
+       self->base_gpa = 0;
+       self->code_gpa = self->base_gpa + (3 * SZ_1M);
+
+       self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M);
+       ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno);
+       self->base_hva = (uintptr_t)self->vm_mem;
+       self->code_hva = self->base_hva - self->base_gpa + self->code_gpa;
+       struct kvm_s390_ucas_mapping map = {
+               .user_addr = self->base_hva,
+               .vcpu_addr = self->base_gpa,
+               .length = VM_MEM_SIZE,
+       };
+       TH_LOG("ucas map %p %p 0x%llx",
+              (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+       rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
+       ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s",
+                               rc, strerror(errno));
+
+       TH_LOG("page in %p", (void *)self->base_gpa);
+       rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa);
+       ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s",
+                               (void *)self->base_hva, rc, strerror(errno));
+
+       self->sie_block->cpuflags &= ~CPUSTAT_STOPPED;
+}
+
+FIXTURE_TEARDOWN(uc_kvm)
+{
+       munmap(self->sie_block, PAGE_SIZE);
+       munmap(self->run, self->kvm_run_size);
+       close(self->vcpu_fd);
+       close(self->vm_fd);
+       close(self->kvm_fd);
+       free(self->vm_mem);
+}
+
+TEST_F(uc_kvm, uc_sie_assertions)
+{
+       /* assert interception of Code 08 (Program Interruption) is set */
+       EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI);
+}
+
+TEST_F(uc_kvm, uc_attr_mem_limit)
+{
+       u64 limit;
+       struct kvm_device_attr attr = {
+               .group = KVM_S390_VM_MEM_CTRL,
+               .attr = KVM_S390_VM_MEM_LIMIT_SIZE,
+               .addr = (unsigned long)&limit,
+       };
+       int rc;
+
+       rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr);
+       EXPECT_EQ(0, rc);
+       EXPECT_EQ(~0UL, limit);
+
+       /* assert set not supported */
+       rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr);
+       EXPECT_EQ(-1, rc);
+       EXPECT_EQ(EINVAL, errno);
+}
+
+TEST_F(uc_kvm, uc_no_dirty_log)
+{
+       struct kvm_dirty_log dlog;
+       int rc;
+
+       rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog);
+       EXPECT_EQ(-1, rc);
+       EXPECT_EQ(EINVAL, errno);
+}
+
+/**
+ * Assert HPAGE CAP cannot be enabled on UCONTROL VM
+ */
+TEST(uc_cap_hpage)
+{
+       int rc, kvm_fd, vm_fd, vcpu_fd;
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_S390_HPAGE_1M,
+       };
+
+       require_ucontrol_admin();
+
+       kvm_fd = open_kvm_dev_path_or_exit();
+       vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
+       ASSERT_GE(vm_fd, 0);
+
+       /* assert hpages are not supported on ucontrol vm */
+       rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M);
+       EXPECT_EQ(0, rc);
+
+       /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */
+       rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+       EXPECT_EQ(-1, rc);
+       EXPECT_EQ(EINVAL, errno);
+
+       /* assert HPAGE CAP is rejected after vCPU creation */
+       vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+       ASSERT_GE(vcpu_fd, 0);
+       rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
+       EXPECT_EQ(-1, rc);
+       EXPECT_EQ(EBUSY, errno);
+
+       close(vcpu_fd);
+       close(vm_fd);
+       close(kvm_fd);
+}
+
+/* calculate host virtual addr from guest physical addr */
+static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa)
+{
+       return (void *)(self->base_hva - self->base_gpa + gpa);
+}
+
+/* map / make additional memory available */
+static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
+{
+       struct kvm_s390_ucas_mapping map = {
+               .user_addr = (u64)gpa2hva(self, vcpu_addr),
+               .vcpu_addr = vcpu_addr,
+               .length = length,
+       };
+       pr_info("ucas map %p %p 0x%llx",
+               (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+       return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
+}
+
+/* unmap previously mapped memory */
+static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
+{
+       struct kvm_s390_ucas_mapping map = {
+               .user_addr = (u64)gpa2hva(self, vcpu_addr),
+               .vcpu_addr = vcpu_addr,
+               .length = length,
+       };
+       pr_info("ucas unmap %p %p 0x%llx",
+               (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
+       return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map);
+}
+
+/* handle ucontrol exit by mapping the accessed segment */
+static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_run *run = self->run;
+       u64 seg_addr;
+       int rc;
+
+       TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+       switch (run->s390_ucontrol.pgm_code) {
+       case PGM_SEGMENT_TRANSLATION:
+               seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1);
+               pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n",
+                       run->s390_ucontrol.trans_exc_code, seg_addr);
+               /* map / make additional memory available */
+               rc = uc_map_ext(self, seg_addr, SZ_1M);
+               TEST_ASSERT_EQ(0, rc);
+               break;
+       default:
+               TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code);
+       }
+}
+
+/*
+ * Handle the SIEIC exit
+ * * fail on codes not expected in the test cases
+ * Returns if interception is handled / execution can be continued
+ */
+static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+
+       /* disable KSS */
+       sie_block->cpuflags &= ~CPUSTAT_KSS;
+       /* disable skey inst interception */
+       sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+}
+
+/*
+ * Handle the instruction intercept
+ * Returns if interception is handled / execution can be continued
+ */
+static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+       int ilen = insn_length(sie_block->ipa >> 8);
+       struct kvm_run *run = self->run;
+
+       switch (run->s390_sieic.ipa) {
+       case 0xB229: /* ISKE */
+       case 0xB22b: /* SSKE */
+       case 0xB22a: /* RRBE */
+               uc_skey_enable(self);
+
+               /* rewind to reexecute intercepted instruction */
+               run->psw_addr = run->psw_addr - ilen;
+               pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr);
+               return true;
+       default:
+               return false;
+       }
+}
+
+/*
+ * Handle the SIEIC exit
+ * * fail on codes not expected in the test cases
+ * Returns if interception is handled / execution can be continued
+ */
+static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+       struct kvm_run *run = self->run;
+
+       /* check SIE interception code */
+       pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n",
+               run->s390_sieic.icptcode,
+               run->s390_sieic.ipa,
+               run->s390_sieic.ipb);
+       switch (run->s390_sieic.icptcode) {
+       case ICPT_INST:
+               /* end execution in caller on intercepted instruction */
+               pr_info("sie instruction interception\n");
+               return uc_handle_insn_ic(self);
+       case ICPT_KSS:
+               uc_skey_enable(self);
+               return true;
+       case ICPT_OPEREXC:
+               /* operation exception */
+               TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb);
+       default:
+               TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode);
+       }
+       return true;
+}
+
+/* verify VM state on exit */
+static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_run *run = self->run;
+
+       switch (run->exit_reason) {
+       case KVM_EXIT_S390_UCONTROL:
+               /** check program interruption code
+                * handle page fault --> ucas map
+                */
+               uc_handle_exit_ucontrol(self);
+               break;
+       case KVM_EXIT_S390_SIEIC:
+               return uc_handle_sieic(self);
+       default:
+               pr_info("exit_reason %2d not handled\n", run->exit_reason);
+       }
+       return true;
+}
+
+/* run the VM until interrupted */
+static int uc_run_once(FIXTURE_DATA(uc_kvm) *self)
+{
+       int rc;
+
+       rc = ioctl(self->vcpu_fd, KVM_RUN, NULL);
+       print_run(self->run, self->sie_block);
+       print_regs(self->run);
+       pr_debug("run %d / %d %s\n", rc, errno, strerror(errno));
+       return rc;
+}
+
+static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+
+       /* assert vm was interrupted by diag 0x0044 */
+       TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
+       TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
+       TEST_ASSERT_EQ(0x8300, sie_block->ipa);
+       TEST_ASSERT_EQ(0x440000, sie_block->ipb);
+}
+
+TEST_F(uc_kvm, uc_no_user_region)
+{
+       struct kvm_userspace_memory_region region = {
+               .slot = 1,
+               .guest_phys_addr = self->code_gpa,
+               .memory_size = VM_MEM_EXT_SIZE,
+               .userspace_addr = (uintptr_t)self->code_hva,
+       };
+       struct kvm_userspace_memory_region2 region2 = {
+               .slot = 1,
+               .guest_phys_addr = self->code_gpa,
+               .memory_size = VM_MEM_EXT_SIZE,
+               .userspace_addr = (uintptr_t)self->code_hva,
+       };
+
+       ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &region));
+       ASSERT_EQ(EINVAL, errno);
+
+       ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, &region2));
+       ASSERT_EQ(EINVAL, errno);
+}
+
+TEST_F(uc_kvm, uc_map_unmap)
+{
+       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+       struct kvm_run *run = self->run;
+       const u64 disp = 1;
+       int rc;
+
+       /* copy test_mem_asm to code_hva / code_gpa */
+       TH_LOG("copy code %p to vm mapped memory %p / %p",
+              &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa);
+       memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE);
+
+       /* DAT disabled + 64 bit mode */
+       run->psw_mask = 0x0000000180000000ULL;
+       run->psw_addr = self->code_gpa;
+
+       /* set register content for test_mem_asm to access not mapped memory*/
+       sync_regs->gprs[1] = 0x55;
+       sync_regs->gprs[5] = self->base_gpa;
+       sync_regs->gprs[6] = VM_MEM_SIZE + disp;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+       /* run and expect to fail with ucontrol pic segment translation */
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(1, sync_regs->gprs[0]);
+       ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+
+       ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
+       ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code);
+
+       /* fail to map memory with not segment aligned address */
+       rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE);
+       ASSERT_GT(0, rc)
+               TH_LOG("ucas map for non segment address should fail but didn't; "
+                      "result %d not expected, %s", rc, strerror(errno));
+
+       /* map / make additional memory available */
+       rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
+       ASSERT_EQ(0, rc)
+               TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno));
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       uc_assert_diag44(self);
+
+       /* assert registers and memory are in expected state */
+       ASSERT_EQ(2, sync_regs->gprs[0]);
+       ASSERT_EQ(0x55, sync_regs->gprs[1]);
+       ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp));
+
+       /* unmap and run loop again */
+       rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
+       ASSERT_EQ(0, rc)
+               TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno));
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(3, sync_regs->gprs[0]);
+       ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
+       ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
+       /* handle ucontrol exit and remap memory after previous map and unmap */
+       ASSERT_EQ(true, uc_handle_exit(self));
+}
+
+TEST_F(uc_kvm, uc_gprs)
+{
+       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+       struct kvm_run *run = self->run;
+       struct kvm_regs regs = {};
+
+       /* Set registers to values that are different from the ones that we expect below */
+       for (int i = 0; i < 8; i++)
+               sync_regs->gprs[i] = 8;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+       /* copy test_gprs_asm to code_hva / code_gpa */
+       TH_LOG("copy code %p to vm mapped memory %p / %p",
+              &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa);
+       memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE);
+
+       /* DAT disabled + 64 bit mode */
+       run->psw_mask = 0x0000000180000000ULL;
+       run->psw_addr = self->code_gpa;
+
+       /* run and expect interception of diag 44 */
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       uc_assert_diag44(self);
+
+       /* Retrieve and check guest register values */
+       ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
+       for (int i = 0; i < 8; i++) {
+               ASSERT_EQ(i, regs.gprs[i]);
+               ASSERT_EQ(i, sync_regs->gprs[i]);
+       }
+
+       /* run and expect interception of diag 44 again */
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       uc_assert_diag44(self);
+
+       /* check continued increment of register 0 value */
+       ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
+       ASSERT_EQ(1, regs.gprs[0]);
+       ASSERT_EQ(1, sync_regs->gprs[0]);
+}
+
+TEST_F(uc_kvm, uc_skey)
+{
+       struct kvm_s390_sie_block *sie_block = self->sie_block;
+       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
+       u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2);
+       struct kvm_run *run = self->run;
+       const u8 skeyvalue = 0x34;
+
+       /* copy test_skey_asm to code_hva / code_gpa */
+       TH_LOG("copy code %p to vm mapped memory %p / %p",
+              &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa);
+       memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE);
+
+       /* set register content for test_skey_asm to access not mapped memory */
+       sync_regs->gprs[1] = skeyvalue;
+       sync_regs->gprs[5] = self->base_gpa;
+       sync_regs->gprs[6] = test_vaddr;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+
+       /* DAT disabled + 64 bit mode */
+       run->psw_mask = 0x0000000180000000ULL;
+       run->psw_addr = self->code_gpa;
+
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(true, uc_handle_exit(self));
+       ASSERT_EQ(1, sync_regs->gprs[0]);
+
+       /* ISKE */
+       ASSERT_EQ(0, uc_run_once(self));
+
+       /*
+        * Bail out and skip the test after uc_skey_enable was executed but iske
+        * is still intercepted. Instructions are not handled by the kernel.
+        * Thus there is no need to test this here.
+        */
+       TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS);
+       TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
+       TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
+       TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
+       TEST_REQUIRE(sie_block->ipa != 0xb229);
+
+       /* ISKE contd. */
+       ASSERT_EQ(false, uc_handle_exit(self));
+       ASSERT_EQ(2, sync_regs->gprs[0]);
+       /* assert initial skey (ACC = 0, R & C = 1) */
+       ASSERT_EQ(0x06, sync_regs->gprs[1]);
+       uc_assert_diag44(self);
+
+       /* SSKE + ISKE */
+       sync_regs->gprs[1] = skeyvalue;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       ASSERT_EQ(3, sync_regs->gprs[0]);
+       ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
+       uc_assert_diag44(self);
+
+       /* RRBE + ISKE */
+       sync_regs->gprs[1] = skeyvalue;
+       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+       ASSERT_EQ(0, uc_run_once(self));
+       ASSERT_EQ(false, uc_handle_exit(self));
+       ASSERT_EQ(4, sync_regs->gprs[0]);
+       /* assert R reset but rest of skey unchanged */
+       ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
+       ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
+       uc_assert_diag44(self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c
deleted file mode 100644 (file)
index e32dd59..0000000
+++ /dev/null
@@ -1,695 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for s390x CMMA migration
- *
- * Copyright IBM Corp. 2023
- *
- * Authors:
- *  Nico Boehr <nrb@linux.ibm.com>
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-#include "processor.h"
-
-#define MAIN_PAGE_COUNT 512
-
-#define TEST_DATA_PAGE_COUNT 512
-#define TEST_DATA_MEMSLOT 1
-#define TEST_DATA_START_GFN PAGE_SIZE
-
-#define TEST_DATA_TWO_PAGE_COUNT 256
-#define TEST_DATA_TWO_MEMSLOT 2
-#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE)
-
-static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
-
-/**
- * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
- * so use_cmma goes on and the CMMA related ioctls do something.
- */
-static void guest_do_one_essa(void)
-{
-       asm volatile(
-               /* load TEST_DATA_START_GFN into r1 */
-               "       llilf 1,%[start_gfn]\n"
-               /* calculate the address from the gfn */
-               "       sllg 1,1,12(0)\n"
-               /* set the first page in TEST_DATA memslot to STABLE */
-               "       .insn rrf,0xb9ab0000,2,1,1,0\n"
-               /* hypercall */
-               "       diag 0,0,0x501\n"
-               "0:     j 0b"
-               :
-               : [start_gfn] "L"(TEST_DATA_START_GFN)
-               : "r1", "r2", "memory", "cc"
-       );
-}
-
-/**
- * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
- * state.
- */
-static void guest_dirty_test_data(void)
-{
-       asm volatile(
-               /* r1 = TEST_DATA_START_GFN */
-               "       xgr 1,1\n"
-               "       llilf 1,%[start_gfn]\n"
-               /* r5 = TEST_DATA_PAGE_COUNT */
-               "       lghi 5,%[page_count]\n"
-               /* r5 += r1 */
-               "2:     agfr 5,1\n"
-               /* r2 = r1 << PAGE_SHIFT */
-               "1:     sllg 2,1,12(0)\n"
-               /* essa(r4, r2, SET_STABLE) */
-               "       .insn rrf,0xb9ab0000,4,2,1,0\n"
-               /* i++ */
-               "       agfi 1,1\n"
-               /* if r1 < r5 goto 1 */
-               "       cgrjl 1,5,1b\n"
-               /* hypercall */
-               "       diag 0,0,0x501\n"
-               "0:     j 0b"
-               :
-               : [start_gfn] "L"(TEST_DATA_START_GFN),
-                 [page_count] "L"(TEST_DATA_PAGE_COUNT)
-               :
-                       /* the counter in our loop over the pages */
-                       "r1",
-                       /* the calculated page physical address */
-                       "r2",
-                       /* ESSA output register */
-                       "r4",
-                       /* last page */
-                       "r5",
-                       "cc", "memory"
-       );
-}
-
-static void create_main_memslot(struct kvm_vm *vm)
-{
-       int i;
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
-       /* set the array of memslots to zero like __vm_create does */
-       for (i = 0; i < NR_MEM_REGIONS; i++)
-               vm->memslots[i] = 0;
-}
-
-static void create_test_memslot(struct kvm_vm *vm)
-{
-       vm_userspace_mem_region_add(vm,
-                                   VM_MEM_SRC_ANONYMOUS,
-                                   TEST_DATA_START_GFN << vm->page_shift,
-                                   TEST_DATA_MEMSLOT,
-                                   TEST_DATA_PAGE_COUNT,
-                                   0
-                                  );
-       vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
-}
-
-static void create_memslots(struct kvm_vm *vm)
-{
-       /*
-        * Our VM has the following memory layout:
-        * +------+---------------------------+
-        * | GFN  | Memslot                   |
-        * +------+---------------------------+
-        * | 0    |                           |
-        * | ...  | MAIN (Code, Stack, ...)   |
-        * | 511  |                           |
-        * +------+---------------------------+
-        * | 4096 |                           |
-        * | ...  | TEST_DATA                 |
-        * | 4607 |                           |
-        * +------+---------------------------+
-        */
-       create_main_memslot(vm);
-       create_test_memslot(vm);
-}
-
-static void finish_vm_setup(struct kvm_vm *vm)
-{
-       struct userspace_mem_region *slot0;
-
-       kvm_vm_elf_load(vm, program_invocation_name);
-
-       slot0 = memslot2region(vm, 0);
-       ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
-
-       kvm_arch_vm_post_create(vm);
-}
-
-static struct kvm_vm *create_vm_two_memslots(void)
-{
-       struct kvm_vm *vm;
-
-       vm = vm_create_barebones();
-
-       create_memslots(vm);
-
-       finish_vm_setup(vm);
-
-       return vm;
-}
-
-static void enable_cmma(struct kvm_vm *vm)
-{
-       int r;
-
-       r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
-       TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
-}
-
-static void enable_dirty_tracking(struct kvm_vm *vm)
-{
-       vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
-       vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
-}
-
-static int __enable_migration_mode(struct kvm_vm *vm)
-{
-       return __kvm_device_attr_set(vm->fd,
-                                    KVM_S390_VM_MIGRATION,
-                                    KVM_S390_VM_MIGRATION_START,
-                                    NULL
-                                   );
-}
-
-static void enable_migration_mode(struct kvm_vm *vm)
-{
-       int r = __enable_migration_mode(vm);
-
-       TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
-}
-
-static bool is_migration_mode_on(struct kvm_vm *vm)
-{
-       u64 out;
-       int r;
-
-       r = __kvm_device_attr_get(vm->fd,
-                                 KVM_S390_VM_MIGRATION,
-                                 KVM_S390_VM_MIGRATION_STATUS,
-                                 &out
-                                );
-       TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
-       return out;
-}
-
-static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
-{
-       struct kvm_s390_cmma_log args;
-       int rc;
-
-       errno = 0;
-
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = 0,
-               .count = sizeof(cmma_value_buf),
-               .flags = flags,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-
-       *errno_out = errno;
-       return rc;
-}
-
-static void test_get_cmma_basic(void)
-{
-       struct kvm_vm *vm = create_vm_two_memslots();
-       struct kvm_vcpu *vcpu;
-       int rc, errno_out;
-
-       /* GET_CMMA_BITS without CMMA enabled should fail */
-       rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno_out, ENXIO);
-
-       enable_cmma(vm);
-       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
-
-       vcpu_run(vcpu);
-
-       /* GET_CMMA_BITS without migration mode and without peeking should fail */
-       rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno_out, EINVAL);
-
-       /* GET_CMMA_BITS without migration mode and with peeking should work */
-       rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
-       TEST_ASSERT_EQ(rc, 0);
-       TEST_ASSERT_EQ(errno_out, 0);
-
-       enable_dirty_tracking(vm);
-       enable_migration_mode(vm);
-
-       /* GET_CMMA_BITS with invalid flags */
-       rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno_out, EINVAL);
-
-       kvm_vm_free(vm);
-}
-
-static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
-{
-       TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
-}
-
-static void test_migration_mode(void)
-{
-       struct kvm_vm *vm = vm_create_barebones();
-       struct kvm_vcpu *vcpu;
-       u64 orig_psw;
-       int rc;
-
-       /* enabling migration mode on a VM without memory should fail */
-       rc = __enable_migration_mode(vm);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno, EINVAL);
-       TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
-       errno = 0;
-
-       create_memslots(vm);
-       finish_vm_setup(vm);
-
-       enable_cmma(vm);
-       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
-       orig_psw = vcpu->run->psw_addr;
-
-       /*
-        * Execute one essa instruction in the guest. Otherwise the guest will
-        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
-        */
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       /* migration mode when memslots have dirty tracking off should fail */
-       rc = __enable_migration_mode(vm);
-       TEST_ASSERT_EQ(rc, -1);
-       TEST_ASSERT_EQ(errno, EINVAL);
-       TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
-       errno = 0;
-
-       /* enable dirty tracking */
-       enable_dirty_tracking(vm);
-
-       /* enabling migration mode should work now */
-       rc = __enable_migration_mode(vm);
-       TEST_ASSERT_EQ(rc, 0);
-       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
-       errno = 0;
-
-       /* execute another ESSA instruction to see this goes fine */
-       vcpu->run->psw_addr = orig_psw;
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       /*
-        * With migration mode on, create a new memslot with dirty tracking off.
-        * This should turn off migration mode.
-        */
-       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
-       vm_userspace_mem_region_add(vm,
-                                   VM_MEM_SRC_ANONYMOUS,
-                                   TEST_DATA_TWO_START_GFN << vm->page_shift,
-                                   TEST_DATA_TWO_MEMSLOT,
-                                   TEST_DATA_TWO_PAGE_COUNT,
-                                   0
-                                  );
-       TEST_ASSERT(!is_migration_mode_on(vm),
-                   "creating memslot without dirty tracking turns off migration mode"
-                  );
-
-       /* ESSA instructions should still execute fine */
-       vcpu->run->psw_addr = orig_psw;
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       /*
-        * Turn on dirty tracking on the new memslot.
-        * It should be possible to turn migration mode back on again.
-        */
-       vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
-       rc = __enable_migration_mode(vm);
-       TEST_ASSERT_EQ(rc, 0);
-       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
-       errno = 0;
-
-       /*
-        * Turn off dirty tracking again, this time with just a flag change.
-        * Again, migration mode should turn off.
-        */
-       TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
-       vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
-       TEST_ASSERT(!is_migration_mode_on(vm),
-                   "disabling dirty tracking should turn off migration mode"
-                  );
-
-       /* ESSA instructions should still execute fine */
-       vcpu->run->psw_addr = orig_psw;
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-/**
- * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
- * CMMA attributes of all pages in both memslots and nothing more dirty.
- * This has the useful side effect of ensuring nothing is CMMA dirty after this
- * function.
- */
-static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
-{
-       struct kvm_s390_cmma_log args;
-
-       /*
-        * First iteration - everything should be dirty.
-        * Start at the main memslot...
-        */
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = 0,
-               .count = sizeof(cmma_value_buf),
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
-       TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
-       TEST_ASSERT_EQ(args.start_gfn, 0);
-
-       /* ...and then - after a hole - the TEST_DATA memslot should follow */
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = MAIN_PAGE_COUNT,
-               .count = sizeof(cmma_value_buf),
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
-       TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
-       TEST_ASSERT_EQ(args.remaining, 0);
-
-       /* ...and nothing else should be there */
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
-               .count = sizeof(cmma_value_buf),
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       TEST_ASSERT_EQ(args.count, 0);
-       TEST_ASSERT_EQ(args.start_gfn, 0);
-       TEST_ASSERT_EQ(args.remaining, 0);
-}
-
-/**
- * Given a VM, assert no pages are CMMA dirty.
- */
-static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
-{
-       struct kvm_s390_cmma_log args;
-
-       /* If we start from GFN 0 again, nothing should be dirty. */
-       args = (struct kvm_s390_cmma_log){
-               .start_gfn = 0,
-               .count = sizeof(cmma_value_buf),
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       if (args.count || args.remaining || args.start_gfn)
-               TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
-                         args.start_gfn,
-                         args.count,
-                         args.remaining
-                        );
-}
-
-static void test_get_inital_dirty(void)
-{
-       struct kvm_vm *vm = create_vm_two_memslots();
-       struct kvm_vcpu *vcpu;
-
-       enable_cmma(vm);
-       vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
-
-       /*
-        * Execute one essa instruction in the guest. Otherwise the guest will
-        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
-        */
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       enable_dirty_tracking(vm);
-       enable_migration_mode(vm);
-
-       assert_all_slots_cmma_dirty(vm);
-
-       /* Start from the beginning again and make sure nothing else is dirty */
-       assert_no_pages_cmma_dirty(vm);
-
-       kvm_vm_free(vm);
-}
-
-static void query_cmma_range(struct kvm_vm *vm,
-                            u64 start_gfn, u64 gfn_count,
-                            struct kvm_s390_cmma_log *res_out)
-{
-       *res_out = (struct kvm_s390_cmma_log){
-               .start_gfn = start_gfn,
-               .count = gfn_count,
-               .flags = 0,
-               .values = (__u64)&cmma_value_buf[0]
-       };
-       memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
-       vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
-}
-
-/**
- * Assert the given cmma_log struct that was executed by query_cmma_range()
- * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
- * dirty_gfn_count CMMA values.
- */
-static void assert_cmma_dirty(u64 first_dirty_gfn,
-                             u64 dirty_gfn_count,
-                             const struct kvm_s390_cmma_log *res)
-{
-       TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
-       TEST_ASSERT_EQ(res->count, dirty_gfn_count);
-       for (size_t i = 0; i < dirty_gfn_count; i++)
-               TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
-       TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
-}
-
-static void test_get_skip_holes(void)
-{
-       size_t gfn_offset;
-       struct kvm_vm *vm = create_vm_two_memslots();
-       struct kvm_s390_cmma_log log;
-       struct kvm_vcpu *vcpu;
-       u64 orig_psw;
-
-       enable_cmma(vm);
-       vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
-
-       orig_psw = vcpu->run->psw_addr;
-
-       /*
-        * Execute some essa instructions in the guest. Otherwise the guest will
-        * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
-        */
-       vcpu_run(vcpu);
-       assert_exit_was_hypercall(vcpu);
-
-       enable_dirty_tracking(vm);
-       enable_migration_mode(vm);
-
-       /* un-dirty all pages */
-       assert_all_slots_cmma_dirty(vm);
-
-       /* Then, dirty just the TEST_DATA memslot */
-       vcpu->run->psw_addr = orig_psw;
-       vcpu_run(vcpu);
-
-       gfn_offset = TEST_DATA_START_GFN;
-       /**
-        * Query CMMA attributes of one page, starting at page 0. Since the
-        * main memslot was not touched by the VM, this should yield the first
-        * page of the TEST_DATA memslot.
-        * The dirty bitmap should now look like this:
-        * 0: not dirty
-        * [0x1, 0x200): dirty
-        */
-       query_cmma_range(vm, 0, 1, &log);
-       assert_cmma_dirty(gfn_offset, 1, &log);
-       gfn_offset++;
-
-       /**
-        * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
-        * memslot. This should wrap back to the beginning of the TEST_DATA
-        * memslot, page 1.
-        * The dirty bitmap should now look like this:
-        * [0, 0x21): not dirty
-        * [0x21, 0x200): dirty
-        */
-       query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
-       assert_cmma_dirty(gfn_offset, 0x20, &log);
-       gfn_offset += 0x20;
-
-       /* Skip 32 pages */
-       gfn_offset += 0x20;
-
-       /**
-        * After skipping 32 pages, query the next 32 (0x20) pages.
-        * The dirty bitmap should now look like this:
-        * [0, 0x21): not dirty
-        * [0x21, 0x41): dirty
-        * [0x41, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       query_cmma_range(vm, gfn_offset, 0x20, &log);
-       assert_cmma_dirty(gfn_offset, 0x20, &log);
-       gfn_offset += 0x20;
-
-       /**
-        * Query 1 page from the beginning of the TEST_DATA memslot. This should
-        * yield page 0x21.
-        * The dirty bitmap should now look like this:
-        * [0, 0x22): not dirty
-        * [0x22, 0x41): dirty
-        * [0x41, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
-       assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
-       gfn_offset++;
-
-       /**
-        * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
-        * This should yield pages [0x23, 0x33).
-        * The dirty bitmap should now look like this:
-        * [0, 0x22): not dirty
-        * 0x22: dirty
-        * [0x23, 0x33): not dirty
-        * [0x33, 0x41): dirty
-        * [0x41, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       gfn_offset = TEST_DATA_START_GFN + 0x23;
-       query_cmma_range(vm, gfn_offset, 15, &log);
-       assert_cmma_dirty(gfn_offset, 15, &log);
-
-       /**
-        * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
-        * This should yield page [0x22, 0x33)
-        * The dirty bitmap should now look like this:
-        * [0, 0x33): not dirty
-        * [0x33, 0x41): dirty
-        * [0x41, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       gfn_offset = TEST_DATA_START_GFN + 0x22;
-       query_cmma_range(vm, gfn_offset, 17, &log);
-       assert_cmma_dirty(gfn_offset, 17, &log);
-
-       /**
-        * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
-        * This should yield page 0x40 and nothing more, since there are more
-        * than 16 non-dirty pages after page 0x40.
-        * The dirty bitmap should now look like this:
-        * [0, 0x33): not dirty
-        * [0x33, 0x40): dirty
-        * [0x40, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       gfn_offset = TEST_DATA_START_GFN + 0x40;
-       query_cmma_range(vm, gfn_offset, 25, &log);
-       assert_cmma_dirty(gfn_offset, 1, &log);
-
-       /**
-        * Query pages [0x33, 0x40).
-        * The dirty bitmap should now look like this:
-        * [0, 0x61): not dirty
-        * [0x61, 0x200): dirty
-        */
-       gfn_offset = TEST_DATA_START_GFN + 0x33;
-       query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
-       assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
-
-       /**
-        * Query the remaining pages [0x61, 0x200).
-        */
-       gfn_offset = TEST_DATA_START_GFN;
-       query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
-       assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
-
-       assert_no_pages_cmma_dirty(vm);
-}
-
-struct testdef {
-       const char *name;
-       void (*test)(void);
-} testlist[] = {
-       { "migration mode and dirty tracking", test_migration_mode },
-       { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
-       { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
-       { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
-};
-
-/**
- * The kernel may support CMMA, but the machine may not (i.e. if running as
- * guest-3).
- *
- * In this case, the CMMA capabilities are all there, but the CMMA-related
- * ioctls fail. To find out whether the machine supports CMMA, create a
- * temporary VM and then query the CMMA feature of the VM.
- */
-static int machine_has_cmma(void)
-{
-       struct kvm_vm *vm = vm_create_barebones();
-       int r;
-
-       r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
-       kvm_vm_free(vm);
-
-       return r;
-}
-
-int main(int argc, char *argv[])
-{
-       int idx;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
-       TEST_REQUIRE(machine_has_cmma());
-
-       ksft_print_header();
-
-       ksft_set_plan(ARRAY_SIZE(testlist));
-
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               testlist[idx].test();
-               ksft_test_result_pass("%s\n", testlist[idx].name);
-       }
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390x/config
deleted file mode 100644 (file)
index 23270f2..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_KVM=y
-CONFIG_KVM_S390_UCONTROL=y
diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c
deleted file mode 100644 (file)
index 2725588..0000000
+++ /dev/null
@@ -1,301 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Hariharan Mari <hari55@linux.ibm.com>
- *
- * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those
- * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction
- * query data reported via the CPU Model, allowing us to directly compare it with the data
- * acquired through executing the queries in the test.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include "facility.h"
-
-#include "kvm_util.h"
-
-#define PLO_FUNCTION_MAX 256
-
-/* Query available CPU subfunctions */
-struct kvm_s390_vm_cpu_subfunc cpu_subfunc;
-
-static void get_cpu_machine_subfuntions(struct kvm_vm *vm,
-                                       struct kvm_s390_vm_cpu_subfunc *cpu_subfunc)
-{
-       int r;
-
-       r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL,
-                                 KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc);
-
-       TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno);
-}
-
-static inline int plo_test_bit(unsigned char nr)
-{
-       unsigned long function = nr | 0x100;
-       int cc;
-
-       asm volatile("  lgr     0,%[function]\n"
-                       /* Parameter registers are ignored for "test bit" */
-                       "       plo     0,0,0,0(0)\n"
-                       "       ipm     %0\n"
-                       "       srl     %0,28\n"
-                       : "=d" (cc)
-                       : [function] "d" (function)
-                       : "cc", "0");
-       return cc == 0;
-}
-
-/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */
-static void test_plo_asm_block(u8 (*query)[32])
-{
-       for (int i = 0; i < PLO_FUNCTION_MAX; ++i) {
-               if (plo_test_bit(i))
-                       (*query)[i >> 3] |= 0x80 >> (i & 7);
-       }
-}
-
-/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */
-static void test_kmac_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb91e0000,0,2\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */
-static void test_kmc_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92f0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */
-static void test_km_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92e0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */
-static void test_kimd_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb93e0000,0,2\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */
-static void test_klmd_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb93f0000,0,2\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */
-static void test_kmctr_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rrf,0xb92d0000,2,4,6,0\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */
-static void test_kmf_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92a0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */
-static void test_kmo_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92b0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */
-static void test_pcc_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb92c0000,0,0\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */
-static void test_prno_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb93c0000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */
-static void test_kma_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rrf,0xb9290000,2,4,6,0\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */
-static void test_kdsa_asm_block(u8 (*query)[16])
-{
-       asm volatile("  la      %%r1,%[query]\n"
-                       "       xgr     %%r0,%%r0\n"
-                       "       .insn   rre,0xb93a0000,0,2\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "r0", "r1");
-}
-
-/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */
-static void test_sortl_asm_block(u8 (*query)[32])
-{
-       asm volatile("  lghi    0,0\n"
-                       "       la      1,%[query]\n"
-                       "       .insn   rre,0xb9380000,2,4\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "0", "1");
-}
-
-/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */
-static void test_dfltcc_asm_block(u8 (*query)[32])
-{
-       asm volatile("  lghi    0,0\n"
-                       "       la      1,%[query]\n"
-                       "       .insn   rrf,0xb9390000,2,4,6,0\n"
-                       : [query] "=R" (*query)
-                       :
-                       : "cc", "0", "1");
-}
-
-/*
- * Testing Perform Function with Concurrent Results (PFCR)
- * CPU subfunctions's ASM block
- */
-static void test_pfcr_asm_block(u8 (*query)[16])
-{
-       asm volatile("  lghi    0,0\n"
-                       "       .insn   rsy,0xeb0000000016,0,0,%[query]\n"
-                       : [query] "=QS" (*query)
-                       :
-                       : "cc", "0");
-}
-
-typedef void (*testfunc_t)(u8 (*array)[]);
-
-struct testdef {
-       const char *subfunc_name;
-       u8 *subfunc_array;
-       size_t array_size;
-       testfunc_t test;
-       int facility_bit;
-} testlist[] = {
-       /*
-        * PLO was introduced in the very first 64-bit machine generation.
-        * Hence it is assumed PLO is always installed in Z Arch.
-        */
-       { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 },
-       /* MSA - Facility bit 17 */
-       { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 },
-       { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 },
-       { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 },
-       { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 },
-       { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 },
-       /* MSA - Facility bit 77 */
-       { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 },
-       { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 },
-       { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 },
-       { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 },
-       /* MSA5 - Facility bit 57 */
-       { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 },
-       /* MSA8 - Facility bit 146 */
-       { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 },
-       /* MSA9 - Facility bit 155 */
-       { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 },
-       /* SORTL - Facility bit 150 */
-       { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 },
-       /* DFLTCC - Facility bit 151 */
-       { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 },
-       /* Concurrent-function facility - Facility bit 201 */
-       { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 },
-};
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       int idx;
-
-       ksft_print_header();
-
-       vm = vm_create(1);
-
-       memset(&cpu_subfunc, 0, sizeof(cpu_subfunc));
-       get_cpu_machine_subfuntions(vm, &cpu_subfunc);
-
-       ksft_set_plan(ARRAY_SIZE(testlist));
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               if (test_facility(testlist[idx].facility_bit)) {
-                       u8 *array = malloc(testlist[idx].array_size);
-
-                       testlist[idx].test((u8 (*)[testlist[idx].array_size])array);
-
-                       TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array,
-                                             array, testlist[idx].array_size), 0);
-
-                       ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
-                       free(array);
-               } else {
-                       ksft_test_result_skip("%s feature is not avaialable\n",
-                                             testlist[idx].subfunc_name);
-               }
-       }
-
-       kvm_vm_free(vm);
-       ksft_finished();
-}
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c
deleted file mode 100644 (file)
index ad80959..0000000
+++ /dev/null
@@ -1,160 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Test KVM debugging features. */
-#include "kvm_util.h"
-#include "test_util.h"
-#include "sie.h"
-
-#include <linux/kvm.h>
-
-#define __LC_SVC_NEW_PSW 0x1c0
-#define __LC_PGM_NEW_PSW 0x1d0
-#define IPA0_DIAG 0x8300
-#define PGM_SPECIFICATION 0x06
-
-/* Common code for testing single-stepping interruptions. */
-extern char int_handler[];
-asm("int_handler:\n"
-    "j .\n");
-
-static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
-                                     size_t new_psw_off, uint64_t *new_psw)
-{
-       struct kvm_guest_debug debug = {};
-       struct kvm_regs regs;
-       struct kvm_vm *vm;
-       char *lowcore;
-
-       vm = vm_create_with_one_vcpu(vcpu, guest_code);
-       lowcore = addr_gpa2hva(vm, 0);
-       new_psw[0] = (*vcpu)->run->psw_mask;
-       new_psw[1] = (uint64_t)int_handler;
-       memcpy(lowcore + new_psw_off, new_psw, 16);
-       vcpu_regs_get(*vcpu, &regs);
-       regs.gprs[2] = -1;
-       vcpu_regs_set(*vcpu, &regs);
-       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
-       vcpu_guest_debug_set(*vcpu, &debug);
-       vcpu_run(*vcpu);
-
-       return vm;
-}
-
-static void test_step_int(void *guest_code, size_t new_psw_off)
-{
-       struct kvm_vcpu *vcpu;
-       uint64_t new_psw[2];
-       struct kvm_vm *vm;
-
-       vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
-       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
-       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
-       kvm_vm_free(vm);
-}
-
-/* Test single-stepping "boring" program interruptions. */
-extern char test_step_pgm_guest_code[];
-asm("test_step_pgm_guest_code:\n"
-    ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
-    "j .\n");
-
-static void test_step_pgm(void)
-{
-       test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
-}
-
-/*
- * Test single-stepping program interruptions caused by DIAG.
- * Userspace emulation must not interfere with single-stepping.
- */
-extern char test_step_pgm_diag_guest_code[];
-asm("test_step_pgm_diag_guest_code:\n"
-    "diag %r0,%r0,0\n"
-    "j .\n");
-
-static void test_step_pgm_diag(void)
-{
-       struct kvm_s390_irq irq = {
-               .type = KVM_S390_PROGRAM_INT,
-               .u.pgm.code = PGM_SPECIFICATION,
-       };
-       struct kvm_vcpu *vcpu;
-       uint64_t new_psw[2];
-       struct kvm_vm *vm;
-
-       vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
-                            __LC_PGM_NEW_PSW, new_psw);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST);
-       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
-       vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
-       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
-       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
-       kvm_vm_free(vm);
-}
-
-/*
- * Test single-stepping program interruptions caused by ISKE.
- * CPUSTAT_KSS handling must not interfere with single-stepping.
- */
-extern char test_step_pgm_iske_guest_code[];
-asm("test_step_pgm_iske_guest_code:\n"
-    "iske %r2,%r2\n"
-    "j .\n");
-
-static void test_step_pgm_iske(void)
-{
-       test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
-}
-
-/*
- * Test single-stepping program interruptions caused by LCTL.
- * KVM emulation must not interfere with single-stepping.
- */
-extern char test_step_pgm_lctl_guest_code[];
-asm("test_step_pgm_lctl_guest_code:\n"
-    "lctl %c0,%c0,1\n"
-    "j .\n");
-
-static void test_step_pgm_lctl(void)
-{
-       test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
-}
-
-/* Test single-stepping supervisor-call interruptions. */
-extern char test_step_svc_guest_code[];
-asm("test_step_svc_guest_code:\n"
-    "svc 0\n"
-    "j .\n");
-
-static void test_step_svc(void)
-{
-       test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
-}
-
-/* Run all tests above. */
-static struct testdef {
-       const char *name;
-       void (*test)(void);
-} testlist[] = {
-       { "single-step pgm", test_step_pgm },
-       { "single-step pgm caused by diag", test_step_pgm_diag },
-       { "single-step pgm caused by iske", test_step_pgm_iske },
-       { "single-step pgm caused by lctl", test_step_pgm_lctl },
-       { "single-step svc", test_step_svc },
-};
-
-int main(int argc, char *argv[])
-{
-       int idx;
-
-       ksft_print_header();
-       ksft_set_plan(ARRAY_SIZE(testlist));
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               testlist[idx].test();
-               ksft_test_result_pass("%s\n", testlist[idx].name);
-       }
-       ksft_finished();
-}
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
deleted file mode 100644 (file)
index 4374b4c..0000000
+++ /dev/null
@@ -1,1187 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test for s390x KVM_S390_MEM_OP
- *
- * Copyright (C) 2019, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <pthread.h>
-
-#include <linux/bits.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-#include "processor.h"
-
-enum mop_target {
-       LOGICAL,
-       SIDA,
-       ABSOLUTE,
-       INVALID,
-};
-
-enum mop_access_mode {
-       READ,
-       WRITE,
-       CMPXCHG,
-};
-
-struct mop_desc {
-       uintptr_t gaddr;
-       uintptr_t gaddr_v;
-       uint64_t set_flags;
-       unsigned int f_check : 1;
-       unsigned int f_inject : 1;
-       unsigned int f_key : 1;
-       unsigned int _gaddr_v : 1;
-       unsigned int _set_flags : 1;
-       unsigned int _sida_offset : 1;
-       unsigned int _ar : 1;
-       uint32_t size;
-       enum mop_target target;
-       enum mop_access_mode mode;
-       void *buf;
-       uint32_t sida_offset;
-       void *old;
-       uint8_t old_value[16];
-       bool *cmpxchg_success;
-       uint8_t ar;
-       uint8_t key;
-};
-
-const uint8_t NO_KEY = 0xff;
-
-static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
-{
-       struct kvm_s390_mem_op ksmo = {
-               .gaddr = (uintptr_t)desc->gaddr,
-               .size = desc->size,
-               .buf = ((uintptr_t)desc->buf),
-               .reserved = "ignored_ignored_ignored_ignored"
-       };
-
-       switch (desc->target) {
-       case LOGICAL:
-               if (desc->mode == READ)
-                       ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
-               if (desc->mode == WRITE)
-                       ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
-               break;
-       case SIDA:
-               if (desc->mode == READ)
-                       ksmo.op = KVM_S390_MEMOP_SIDA_READ;
-               if (desc->mode == WRITE)
-                       ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
-               break;
-       case ABSOLUTE:
-               if (desc->mode == READ)
-                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ;
-               if (desc->mode == WRITE)
-                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
-               if (desc->mode == CMPXCHG) {
-                       ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG;
-                       ksmo.old_addr = (uint64_t)desc->old;
-                       memcpy(desc->old_value, desc->old, desc->size);
-               }
-               break;
-       case INVALID:
-               ksmo.op = -1;
-       }
-       if (desc->f_check)
-               ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY;
-       if (desc->f_inject)
-               ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION;
-       if (desc->_set_flags)
-               ksmo.flags = desc->set_flags;
-       if (desc->f_key && desc->key != NO_KEY) {
-               ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION;
-               ksmo.key = desc->key;
-       }
-       if (desc->_ar)
-               ksmo.ar = desc->ar;
-       else
-               ksmo.ar = 0;
-       if (desc->_sida_offset)
-               ksmo.sida_offset = desc->sida_offset;
-
-       return ksmo;
-}
-
-struct test_info {
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-};
-
-#define PRINT_MEMOP false
-static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo)
-{
-       if (!PRINT_MEMOP)
-               return;
-
-       if (!vcpu)
-               printf("vm memop(");
-       else
-               printf("vcpu memop(");
-       switch (ksmo->op) {
-       case KVM_S390_MEMOP_LOGICAL_READ:
-               printf("LOGICAL, READ, ");
-               break;
-       case KVM_S390_MEMOP_LOGICAL_WRITE:
-               printf("LOGICAL, WRITE, ");
-               break;
-       case KVM_S390_MEMOP_SIDA_READ:
-               printf("SIDA, READ, ");
-               break;
-       case KVM_S390_MEMOP_SIDA_WRITE:
-               printf("SIDA, WRITE, ");
-               break;
-       case KVM_S390_MEMOP_ABSOLUTE_READ:
-               printf("ABSOLUTE, READ, ");
-               break;
-       case KVM_S390_MEMOP_ABSOLUTE_WRITE:
-               printf("ABSOLUTE, WRITE, ");
-               break;
-       case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
-               printf("ABSOLUTE, CMPXCHG, ");
-               break;
-       }
-       printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx",
-              ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
-              ksmo->old_addr);
-       if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
-               printf(", CHECK_ONLY");
-       if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
-               printf(", INJECT_EXCEPTION");
-       if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION)
-               printf(", SKEY_PROTECTION");
-       puts(")");
-}
-
-static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
-                          struct mop_desc *desc)
-{
-       struct kvm_vcpu *vcpu = info.vcpu;
-
-       if (!vcpu)
-               return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
-       else
-               return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
-}
-
-static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
-                       struct mop_desc *desc)
-{
-       int r;
-
-       r = err_memop_ioctl(info, ksmo, desc);
-       if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) {
-               if (desc->cmpxchg_success) {
-                       int diff = memcmp(desc->old_value, desc->old, desc->size);
-                       *desc->cmpxchg_success = !diff;
-               }
-       }
-       TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
-}
-
-#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...)    \
-({                                                                             \
-       struct test_info __info = (info_p);                                     \
-       struct mop_desc __desc = {                                              \
-               .target = (mop_target_p),                                       \
-               .mode = (access_mode_p),                                        \
-               .buf = (buf_p),                                                 \
-               .size = (size_p),                                               \
-               __VA_ARGS__                                                     \
-       };                                                                      \
-       struct kvm_s390_mem_op __ksmo;                                          \
-                                                                               \
-       if (__desc._gaddr_v) {                                                  \
-               if (__desc.target == ABSOLUTE)                                  \
-                       __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \
-               else                                                            \
-                       __desc.gaddr = __desc.gaddr_v;                          \
-       }                                                                       \
-       __ksmo = ksmo_from_desc(&__desc);                                       \
-       print_memop(__info.vcpu, &__ksmo);                                      \
-       err##memop_ioctl(__info, &__ksmo, &__desc);                             \
-})
-
-#define MOP(...) MEMOP(, __VA_ARGS__)
-#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__)
-
-#define GADDR(a) .gaddr = ((uintptr_t)a)
-#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v)
-#define CHECK_ONLY .f_check = 1
-#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f)
-#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
-#define AR(a) ._ar = 1, .ar = (a)
-#define KEY(a) .f_key = 1, .key = (a)
-#define INJECT .f_inject = 1
-#define CMPXCHG_OLD(o) .old = (o)
-#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s)
-
-#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
-
-#define CR0_FETCH_PROTECTION_OVERRIDE  (1UL << (63 - 38))
-#define CR0_STORAGE_PROTECTION_OVERRIDE        (1UL << (63 - 39))
-
-static uint8_t __aligned(PAGE_SIZE) mem1[65536];
-static uint8_t __aligned(PAGE_SIZE) mem2[65536];
-
-struct test_default {
-       struct kvm_vm *kvm_vm;
-       struct test_info vm;
-       struct test_info vcpu;
-       struct kvm_run *run;
-       int size;
-};
-
-static struct test_default test_default_init(void *guest_code)
-{
-       struct kvm_vcpu *vcpu;
-       struct test_default t;
-
-       t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
-       t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       t.vm = (struct test_info) { t.kvm_vm, NULL };
-       t.vcpu = (struct test_info) { t.kvm_vm, vcpu };
-       t.run = vcpu->run;
-       return t;
-}
-
-enum stage {
-       /* Synced state set by host, e.g. DAT */
-       STAGE_INITED,
-       /* Guest did nothing */
-       STAGE_IDLED,
-       /* Guest set storage keys (specifics up to test case) */
-       STAGE_SKEYS_SET,
-       /* Guest copied memory (locations up to test case) */
-       STAGE_COPIED,
-       /* End of guest code reached */
-       STAGE_DONE,
-};
-
-#define HOST_SYNC(info_p, stage)                                       \
-({                                                                     \
-       struct test_info __info = (info_p);                             \
-       struct kvm_vcpu *__vcpu = __info.vcpu;                          \
-       struct ucall uc;                                                \
-       int __stage = (stage);                                          \
-                                                                       \
-       vcpu_run(__vcpu);                                               \
-       get_ucall(__vcpu, &uc);                                         \
-       if (uc.cmd == UCALL_ABORT) {                                    \
-               REPORT_GUEST_ASSERT(uc);                                \
-       }                                                               \
-       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                             \
-       TEST_ASSERT_EQ(uc.args[1], __stage);                            \
-})                                                                     \
-
-static void prepare_mem12(void)
-{
-       int i;
-
-       for (i = 0; i < sizeof(mem1); i++)
-               mem1[i] = rand();
-       memset(mem2, 0xaa, sizeof(mem2));
-}
-
-#define ASSERT_MEM_EQ(p1, p2, size) \
-       TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
-
-static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu,
-                              enum mop_target mop_target, uint32_t size, uint8_t key)
-{
-       prepare_mem12();
-       CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size,
-                  GADDR_V(mem1), KEY(key));
-       HOST_SYNC(copy_cpu, STAGE_COPIED);
-       CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
-                  GADDR_V(mem2), KEY(key));
-       ASSERT_MEM_EQ(mem1, mem2, size);
-}
-
-static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
-                        enum mop_target mop_target, uint32_t size, uint8_t key)
-{
-       prepare_mem12();
-       CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1));
-       HOST_SYNC(copy_cpu, STAGE_COPIED);
-       CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
-                  GADDR_V(mem2), KEY(key));
-       ASSERT_MEM_EQ(mem1, mem2, size);
-}
-
-static void default_cmpxchg(struct test_default *test, uint8_t key)
-{
-       for (int size = 1; size <= 16; size *= 2) {
-               for (int offset = 0; offset < 16; offset += size) {
-                       uint8_t __aligned(16) new[16] = {};
-                       uint8_t __aligned(16) old[16];
-                       bool succ;
-
-                       prepare_mem12();
-                       default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY);
-
-                       memcpy(&old, mem1, 16);
-                       MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
-                           size, GADDR_V(mem1 + offset),
-                           CMPXCHG_OLD(old + offset),
-                           CMPXCHG_SUCCESS(&succ), KEY(key));
-                       HOST_SYNC(test->vcpu, STAGE_COPIED);
-                       MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
-                       TEST_ASSERT(succ, "exchange of values should succeed");
-                       memcpy(mem1 + offset, new + offset, size);
-                       ASSERT_MEM_EQ(mem1, mem2, 16);
-
-                       memcpy(&old, mem1, 16);
-                       new[offset]++;
-                       old[offset]++;
-                       MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
-                           size, GADDR_V(mem1 + offset),
-                           CMPXCHG_OLD(old + offset),
-                           CMPXCHG_SUCCESS(&succ), KEY(key));
-                       HOST_SYNC(test->vcpu, STAGE_COPIED);
-                       MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
-                       TEST_ASSERT(!succ, "exchange of values should not succeed");
-                       ASSERT_MEM_EQ(mem1, mem2, 16);
-                       ASSERT_MEM_EQ(&old, mem1, 16);
-               }
-       }
-}
-
-static void guest_copy(void)
-{
-       GUEST_SYNC(STAGE_INITED);
-       memcpy(&mem2, &mem1, sizeof(mem2));
-       GUEST_SYNC(STAGE_COPIED);
-}
-
-static void test_copy(void)
-{
-       struct test_default t = test_default_init(guest_copy);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_copy_access_register(void)
-{
-       struct test_default t = test_default_init(guest_copy);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-
-       prepare_mem12();
-       t.run->psw_mask &= ~(3UL << (63 - 17));
-       t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
-
-       /*
-        * Primary address space gets used if an access register
-        * contains zero. The host makes use of AR[1] so is a good
-        * candidate to ensure the guest AR (of zero) is used.
-        */
-       CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size,
-                  GADDR_V(mem1), AR(1));
-       HOST_SYNC(t.vcpu, STAGE_COPIED);
-
-       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size,
-                  GADDR_V(mem2), AR(1));
-       ASSERT_MEM_EQ(mem1, mem2, t.size);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void set_storage_key_range(void *addr, size_t len, uint8_t key)
-{
-       uintptr_t _addr, abs, i;
-       int not_mapped = 0;
-
-       _addr = (uintptr_t)addr;
-       for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
-               abs = i;
-               asm volatile (
-                              "lra     %[abs], 0(0,%[abs])\n"
-                       "       jz      0f\n"
-                       "       llill   %[not_mapped],1\n"
-                       "       j       1f\n"
-                       "0:     sske    %[key], %[abs]\n"
-                       "1:"
-                       : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped)
-                       : [key] "r" (key)
-                       : "cc"
-               );
-               GUEST_ASSERT_EQ(not_mapped, 0);
-       }
-}
-
-static void guest_copy_key(void)
-{
-       set_storage_key_range(mem1, sizeof(mem1), 0x90);
-       set_storage_key_range(mem2, sizeof(mem2), 0x90);
-       GUEST_SYNC(STAGE_SKEYS_SET);
-
-       for (;;) {
-               memcpy(&mem2, &mem1, sizeof(mem2));
-               GUEST_SYNC(STAGE_COPIED);
-       }
-}
-
-static void test_copy_key(void)
-{
-       struct test_default t = test_default_init(guest_copy_key);
-
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vm, no key */
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY);
-
-       /* vm/vcpu, machting key or key 0 */
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0);
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0);
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
-       /*
-        * There used to be different code paths for key handling depending on
-        * if the region crossed a page boundary.
-        * There currently are not, but the more tests the merrier.
-        */
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0);
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9);
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0);
-       default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9);
-
-       /* vm/vcpu, mismatching keys on read, but no fetch protection */
-       default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
-       default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_cmpxchg_key(void)
-{
-       struct test_default t = test_default_init(guest_copy_key);
-
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       default_cmpxchg(&t, NO_KEY);
-       default_cmpxchg(&t, 0);
-       default_cmpxchg(&t, 9);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static __uint128_t cut_to_size(int size, __uint128_t val)
-{
-       switch (size) {
-       case 1:
-               return (uint8_t)val;
-       case 2:
-               return (uint16_t)val;
-       case 4:
-               return (uint32_t)val;
-       case 8:
-               return (uint64_t)val;
-       case 16:
-               return val;
-       }
-       GUEST_FAIL("Invalid size = %u", size);
-       return 0;
-}
-
-static bool popcount_eq(__uint128_t a, __uint128_t b)
-{
-       unsigned int count_a, count_b;
-
-       count_a = __builtin_popcountl((uint64_t)(a >> 64)) +
-                 __builtin_popcountl((uint64_t)a);
-       count_b = __builtin_popcountl((uint64_t)(b >> 64)) +
-                 __builtin_popcountl((uint64_t)b);
-       return count_a == count_b;
-}
-
-static __uint128_t rotate(int size, __uint128_t val, int amount)
-{
-       unsigned int bits = size * 8;
-
-       amount = (amount + bits) % bits;
-       val = cut_to_size(size, val);
-       if (!amount)
-               return val;
-       return (val << (bits - amount)) | (val >> amount);
-}
-
-const unsigned int max_block = 16;
-
-static void choose_block(bool guest, int i, int *size, int *offset)
-{
-       unsigned int rand;
-
-       rand = i;
-       if (guest) {
-               rand = rand * 19 + 11;
-               *size = 1 << ((rand % 3) + 2);
-               rand = rand * 19 + 11;
-               *offset = (rand % max_block) & ~(*size - 1);
-       } else {
-               rand = rand * 17 + 5;
-               *size = 1 << (rand % 5);
-               rand = rand * 17 + 5;
-               *offset = (rand % max_block) & ~(*size - 1);
-       }
-}
-
-static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old)
-{
-       unsigned int rand;
-       int amount;
-       bool swap;
-
-       rand = i;
-       rand = rand * 3 + 1;
-       if (guest)
-               rand = rand * 3 + 1;
-       swap = rand % 2 == 0;
-       if (swap) {
-               int i, j;
-               __uint128_t new;
-               uint8_t byte0, byte1;
-
-               rand = rand * 3 + 1;
-               i = rand % size;
-               rand = rand * 3 + 1;
-               j = rand % size;
-               if (i == j)
-                       return old;
-               new = rotate(16, old, i * 8);
-               byte0 = new & 0xff;
-               new &= ~0xff;
-               new = rotate(16, new, -i * 8);
-               new = rotate(16, new, j * 8);
-               byte1 = new & 0xff;
-               new = (new & ~0xff) | byte0;
-               new = rotate(16, new, -j * 8);
-               new = rotate(16, new, i * 8);
-               new = new | byte1;
-               new = rotate(16, new, -i * 8);
-               return new;
-       }
-       rand = rand * 3 + 1;
-       amount = rand % (size * 8);
-       return rotate(size, old, amount);
-}
-
-static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new)
-{
-       bool ret;
-
-       switch (size) {
-       case 4: {
-                       uint32_t old = *old_addr;
-
-                       asm volatile ("cs %[old],%[new],%[address]"
-                           : [old] "+d" (old),
-                             [address] "+Q" (*(uint32_t *)(target))
-                           : [new] "d" ((uint32_t)new)
-                           : "cc"
-                       );
-                       ret = old == (uint32_t)*old_addr;
-                       *old_addr = old;
-                       return ret;
-               }
-       case 8: {
-                       uint64_t old = *old_addr;
-
-                       asm volatile ("csg %[old],%[new],%[address]"
-                           : [old] "+d" (old),
-                             [address] "+Q" (*(uint64_t *)(target))
-                           : [new] "d" ((uint64_t)new)
-                           : "cc"
-                       );
-                       ret = old == (uint64_t)*old_addr;
-                       *old_addr = old;
-                       return ret;
-               }
-       case 16: {
-                       __uint128_t old = *old_addr;
-
-                       asm volatile ("cdsg %[old],%[new],%[address]"
-                           : [old] "+d" (old),
-                             [address] "+Q" (*(__uint128_t *)(target))
-                           : [new] "d" (new)
-                           : "cc"
-                       );
-                       ret = old == *old_addr;
-                       *old_addr = old;
-                       return ret;
-               }
-       }
-       GUEST_FAIL("Invalid size = %u", size);
-       return 0;
-}
-
-const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000;
-
-static void guest_cmpxchg_key(void)
-{
-       int size, offset;
-       __uint128_t old, new;
-
-       set_storage_key_range(mem1, max_block, 0x10);
-       set_storage_key_range(mem2, max_block, 0x10);
-       GUEST_SYNC(STAGE_SKEYS_SET);
-
-       for (int i = 0; i < cmpxchg_iter_outer; i++) {
-               do {
-                       old = 1;
-               } while (!_cmpxchg(16, mem1, &old, 0));
-               for (int j = 0; j < cmpxchg_iter_inner; j++) {
-                       choose_block(true, i + j, &size, &offset);
-                       do {
-                               new = permutate_bits(true, i + j, size, old);
-                       } while (!_cmpxchg(size, mem2 + offset, &old, new));
-               }
-       }
-
-       GUEST_SYNC(STAGE_DONE);
-}
-
-static void *run_guest(void *data)
-{
-       struct test_info *info = data;
-
-       HOST_SYNC(*info, STAGE_DONE);
-       return NULL;
-}
-
-static char *quad_to_char(__uint128_t *quad, int size)
-{
-       return ((char *)quad) + (sizeof(*quad) - size);
-}
-
-static void test_cmpxchg_key_concurrent(void)
-{
-       struct test_default t = test_default_init(guest_cmpxchg_key);
-       int size, offset;
-       __uint128_t old, new;
-       bool success;
-       pthread_t thread;
-
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-       prepare_mem12();
-       MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2));
-       pthread_create(&thread, NULL, run_guest, &t.vcpu);
-
-       for (int i = 0; i < cmpxchg_iter_outer; i++) {
-               do {
-                       old = 0;
-                       new = 1;
-                       MOP(t.vm, ABSOLUTE, CMPXCHG, &new,
-                           sizeof(new), GADDR_V(mem1),
-                           CMPXCHG_OLD(&old),
-                           CMPXCHG_SUCCESS(&success), KEY(1));
-               } while (!success);
-               for (int j = 0; j < cmpxchg_iter_inner; j++) {
-                       choose_block(false, i + j, &size, &offset);
-                       do {
-                               new = permutate_bits(false, i + j, size, old);
-                               MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size),
-                                   size, GADDR_V(mem2 + offset),
-                                   CMPXCHG_OLD(quad_to_char(&old, size)),
-                                   CMPXCHG_SUCCESS(&success), KEY(1));
-                       } while (!success);
-               }
-       }
-
-       pthread_join(thread, NULL);
-
-       MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2));
-       TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2),
-                   "Must retain number of set bits");
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void guest_copy_key_fetch_prot(void)
-{
-       /*
-        * For some reason combining the first sync with override enablement
-        * results in an exception when calling HOST_SYNC.
-        */
-       GUEST_SYNC(STAGE_INITED);
-       /* Storage protection override applies to both store and fetch. */
-       set_storage_key_range(mem1, sizeof(mem1), 0x98);
-       set_storage_key_range(mem2, sizeof(mem2), 0x98);
-       GUEST_SYNC(STAGE_SKEYS_SET);
-
-       for (;;) {
-               memcpy(&mem2, &mem1, sizeof(mem2));
-               GUEST_SYNC(STAGE_COPIED);
-       }
-}
-
-static void test_copy_key_storage_prot_override(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
-       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vcpu, mismatching keys, storage protection override in effect */
-       default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_copy_key_fetch_prot(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vm/vcpu, matching key, fetch protection in effect */
-       default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
-       default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-#define ERR_PROT_MOP(...)                                                      \
-({                                                                             \
-       int rv;                                                                 \
-                                                                               \
-       rv = ERR_MOP(__VA_ARGS__);                                              \
-       TEST_ASSERT(rv == 4, "Should result in protection exception");          \
-})
-
-static void guest_error_key(void)
-{
-       GUEST_SYNC(STAGE_INITED);
-       set_storage_key_range(mem1, PAGE_SIZE, 0x18);
-       set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98);
-       GUEST_SYNC(STAGE_SKEYS_SET);
-       GUEST_SYNC(STAGE_IDLED);
-}
-
-static void test_errors_key(void)
-{
-       struct test_default t = test_default_init(guest_error_key);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vm/vcpu, mismatching keys, fetch protection in effect */
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_cmpxchg_key(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-       int i;
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       for (i = 1; i <= 16; i *= 2) {
-               __uint128_t old = 0;
-
-               ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2),
-                            CMPXCHG_OLD(&old), KEY(2));
-       }
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_termination(void)
-{
-       struct test_default t = test_default_init(guest_error_key);
-       uint64_t prefix;
-       uint64_t teid;
-       uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
-       uint64_t psw[2];
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vcpu, mismatching keys after first page */
-       ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT);
-       /*
-        * The memop injected a program exception and the test needs to check the
-        * Translation-Exception Identification (TEID). It is necessary to run
-        * the guest in order to be able to read the TEID from guest memory.
-        * Set the guest program new PSW, so the guest state is not clobbered.
-        */
-       prefix = t.run->s.regs.prefix;
-       psw[0] = t.run->psw_mask;
-       psw[1] = t.run->psw_addr;
-       MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464));
-       HOST_SYNC(t.vcpu, STAGE_IDLED);
-       MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
-       /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
-       TEST_ASSERT_EQ(teid & teid_mask, 0);
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_key_storage_prot_override(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot);
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
-       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vm, mismatching keys, storage protection override not applicable to vm */
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-const uint64_t last_page_addr = -PAGE_SIZE;
-
-static void guest_copy_key_fetch_prot_override(void)
-{
-       int i;
-       char *page_0 = 0;
-
-       GUEST_SYNC(STAGE_INITED);
-       set_storage_key_range(0, PAGE_SIZE, 0x18);
-       set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0);
-       asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc");
-       GUEST_SYNC(STAGE_SKEYS_SET);
-
-       for (;;) {
-               for (i = 0; i < PAGE_SIZE; i++)
-                       page_0[i] = mem1[i];
-               GUEST_SYNC(STAGE_COPIED);
-       }
-}
-
-static void test_copy_key_fetch_prot_override(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
-       vm_vaddr_t guest_0_page, guest_last_page;
-
-       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
-       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
-       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
-               print_skip("did not allocate guest pages at required positions");
-               goto out;
-       }
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
-       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vcpu, mismatching keys on fetch, fetch protection override applies */
-       prepare_mem12();
-       MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1));
-       HOST_SYNC(t.vcpu, STAGE_COPIED);
-       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
-       ASSERT_MEM_EQ(mem1, mem2, 2048);
-
-       /*
-        * vcpu, mismatching keys on fetch, fetch protection override applies,
-        * wraparound
-        */
-       prepare_mem12();
-       MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page));
-       HOST_SYNC(t.vcpu, STAGE_COPIED);
-       CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048,
-                  GADDR_V(guest_last_page), KEY(2));
-       ASSERT_MEM_EQ(mem1, mem2, 2048);
-
-out:
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_key_fetch_prot_override_not_enabled(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
-       vm_vaddr_t guest_0_page, guest_last_page;
-
-       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
-       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
-       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
-               print_skip("did not allocate guest pages at required positions");
-               goto out;
-       }
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /* vcpu, mismatching keys on fetch, fetch protection override not enabled */
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2));
-
-out:
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_key_fetch_prot_override_enabled(void)
-{
-       struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
-       vm_vaddr_t guest_0_page, guest_last_page;
-
-       guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
-       guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
-       if (guest_0_page != 0 || guest_last_page != last_page_addr) {
-               print_skip("did not allocate guest pages at required positions");
-               goto out;
-       }
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-       t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
-       t.run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
-
-       /*
-        * vcpu, mismatching keys on fetch,
-        * fetch protection override does not apply because memory range exceeded
-        */
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
-                  GADDR_V(guest_last_page), KEY(2));
-       /* vm, fetch protected override does not apply */
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2));
-       CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
-
-out:
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void guest_idle(void)
-{
-       GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
-       for (;;)
-               GUEST_SYNC(STAGE_IDLED);
-}
-
-static void _test_errors_common(struct test_info info, enum mop_target target, int size)
-{
-       int rv;
-
-       /* Bad size: */
-       rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1));
-       TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
-
-       /* Zero size: */
-       rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1));
-       TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
-                   "ioctl allows 0 as size");
-
-       /* Bad flags: */
-       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
-
-       /* Bad guest address: */
-       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
-       TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY");
-       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL));
-       TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write");
-
-       /* Bad host address: */
-       rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
-       TEST_ASSERT(rv == -1 && errno == EFAULT,
-                   "ioctl does not report bad host memory address");
-
-       /* Bad key: */
-       rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
-}
-
-static void test_errors(void)
-{
-       struct test_default t = test_default_init(guest_idle);
-       int rv;
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-
-       _test_errors_common(t.vcpu, LOGICAL, t.size);
-       _test_errors_common(t.vm, ABSOLUTE, t.size);
-
-       /* Bad operation: */
-       rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
-       /* virtual addresses are not translated when passing INVALID */
-       rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
-
-       /* Bad access register: */
-       t.run->psw_mask &= ~(3UL << (63 - 17));
-       t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
-       HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */
-       rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
-       TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
-       t.run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
-       HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */
-
-       /* Check that the SIDA calls are rejected for non-protected guests */
-       rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
-       TEST_ASSERT(rv == -1 && errno == EINVAL,
-                   "ioctl does not reject SIDA_READ in non-protected mode");
-       rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
-       TEST_ASSERT(rv == -1 && errno == EINVAL,
-                   "ioctl does not reject SIDA_WRITE in non-protected mode");
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-static void test_errors_cmpxchg(void)
-{
-       struct test_default t = test_default_init(guest_idle);
-       __uint128_t old;
-       int rv, i, power = 1;
-
-       HOST_SYNC(t.vcpu, STAGE_INITED);
-
-       for (i = 0; i < 32; i++) {
-               if (i == power) {
-                       power *= 2;
-                       continue;
-               }
-               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1),
-                            CMPXCHG_OLD(&old));
-               TEST_ASSERT(rv == -1 && errno == EINVAL,
-                           "ioctl allows bad size for cmpxchg");
-       }
-       for (i = 1; i <= 16; i *= 2) {
-               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL),
-                            CMPXCHG_OLD(&old));
-               TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg");
-       }
-       for (i = 2; i <= 16; i *= 2) {
-               rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1),
-                            CMPXCHG_OLD(&old));
-               TEST_ASSERT(rv == -1 && errno == EINVAL,
-                           "ioctl allows bad alignment for cmpxchg");
-       }
-
-       kvm_vm_free(t.kvm_vm);
-}
-
-int main(int argc, char *argv[])
-{
-       int extension_cap, idx;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
-       extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
-
-       struct testdef {
-               const char *name;
-               void (*test)(void);
-               bool requirements_met;
-       } testlist[] = {
-               {
-                       .name = "simple copy",
-                       .test = test_copy,
-                       .requirements_met = true,
-               },
-               {
-                       .name = "generic error checks",
-                       .test = test_errors,
-                       .requirements_met = true,
-               },
-               {
-                       .name = "copy with storage keys",
-                       .test = test_copy_key,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "cmpxchg with storage keys",
-                       .test = test_cmpxchg_key,
-                       .requirements_met = extension_cap & 0x2,
-               },
-               {
-                       .name = "concurrently cmpxchg with storage keys",
-                       .test = test_cmpxchg_key_concurrent,
-                       .requirements_met = extension_cap & 0x2,
-               },
-               {
-                       .name = "copy with key storage protection override",
-                       .test = test_copy_key_storage_prot_override,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "copy with key fetch protection",
-                       .test = test_copy_key_fetch_prot,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "copy with key fetch protection override",
-                       .test = test_copy_key_fetch_prot_override,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "copy with access register mode",
-                       .test = test_copy_access_register,
-                       .requirements_met = true,
-               },
-               {
-                       .name = "error checks with key",
-                       .test = test_errors_key,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "error checks for cmpxchg with key",
-                       .test = test_errors_cmpxchg_key,
-                       .requirements_met = extension_cap & 0x2,
-               },
-               {
-                       .name = "error checks for cmpxchg",
-                       .test = test_errors_cmpxchg,
-                       .requirements_met = extension_cap & 0x2,
-               },
-               {
-                       .name = "termination",
-                       .test = test_termination,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "error checks with key storage protection override",
-                       .test = test_errors_key_storage_prot_override,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "error checks without key fetch prot override",
-                       .test = test_errors_key_fetch_prot_override_not_enabled,
-                       .requirements_met = extension_cap > 0,
-               },
-               {
-                       .name = "error checks with key fetch prot override",
-                       .test = test_errors_key_fetch_prot_override_enabled,
-                       .requirements_met = extension_cap > 0,
-               },
-       };
-
-       ksft_print_header();
-       ksft_set_plan(ARRAY_SIZE(testlist));
-
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               if (testlist[idx].requirements_met) {
-                       testlist[idx].test();
-                       ksft_test_result_pass("%s\n", testlist[idx].name);
-               } else {
-                       ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n",
-                                             testlist[idx].name, extension_cap);
-               }
-       }
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c
deleted file mode 100644 (file)
index b58f75b..0000000
+++ /dev/null
@@ -1,313 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test for s390x CPU resets
- *
- * Copyright (C) 2020, IBM
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-
-#define LOCAL_IRQS 32
-
-#define ARBITRARY_NON_ZERO_VCPU_ID 3
-
-struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
-
-static uint8_t regs_null[512];
-
-static void guest_code_initial(void)
-{
-       /* set several CRs to "safe" value */
-       unsigned long cr2_59 = 0x10;    /* enable guarded storage */
-       unsigned long cr8_63 = 0x1;     /* monitor mask = 1 */
-       unsigned long cr10 = 1;         /* PER START */
-       unsigned long cr11 = -1;        /* PER END */
-
-
-       /* Dirty registers */
-       asm volatile (
-               "       lghi    2,0x11\n"       /* Round toward 0 */
-               "       sfpc    2\n"            /* set fpc to !=0 */
-               "       lctlg   2,2,%0\n"
-               "       lctlg   8,8,%1\n"
-               "       lctlg   10,10,%2\n"
-               "       lctlg   11,11,%3\n"
-               /* now clobber some general purpose regs */
-               "       llihh   0,0xffff\n"
-               "       llihl   1,0x5555\n"
-               "       llilh   2,0xaaaa\n"
-               "       llill   3,0x0000\n"
-               /* now clobber a floating point reg */
-               "       lghi    4,0x1\n"
-               "       cdgbr   0,4\n"
-               /* now clobber an access reg */
-               "       sar     9,4\n"
-               /* We embed diag 501 here to control register content */
-               "       diag 0,0,0x501\n"
-               :
-               : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11)
-               /* no clobber list as this should not return */
-               );
-}
-
-static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
-{
-       uint64_t eval_reg;
-
-       eval_reg = vcpu_get_reg(vcpu, id);
-       TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
-}
-
-static void assert_noirq(struct kvm_vcpu *vcpu)
-{
-       struct kvm_s390_irq_state irq_state;
-       int irqs;
-
-       irq_state.len = sizeof(buf);
-       irq_state.buf = (unsigned long)buf;
-       irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state);
-       /*
-        * irqs contains the number of retrieved interrupts. Any interrupt
-        * (notably, the emergency call interrupt we have injected) should
-        * be cleared by the resets, so this should be 0.
-        */
-       TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
-       TEST_ASSERT(!irqs, "IRQ pending");
-}
-
-static void assert_clear(struct kvm_vcpu *vcpu)
-{
-       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-       struct kvm_fpu fpu;
-
-       vcpu_regs_get(vcpu, &regs);
-       TEST_ASSERT(!memcmp(&regs.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
-
-       vcpu_sregs_get(vcpu, &sregs);
-       TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
-
-       vcpu_fpu_get(vcpu, &fpu);
-       TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
-
-       /* sync regs */
-       TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)),
-                   "gprs0-15 == 0 (sync_regs)");
-
-       TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)),
-                   "acrs0-15 == 0 (sync_regs)");
-
-       TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)),
-                   "vrs0-15 == 0 (sync_regs)");
-}
-
-static void assert_initial_noclear(struct kvm_vcpu *vcpu)
-{
-       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-
-       TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
-                   "gpr0 == 0xffff000000000000 (sync_regs)");
-       TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
-                   "gpr1 == 0x0000555500000000 (sync_regs)");
-       TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL,
-                   "gpr2 == 0x00000000aaaa0000 (sync_regs)");
-       TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL,
-                   "gpr3 == 0x0000000000000000 (sync_regs)");
-       TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL,
-                   "fpr0 == 0f1 (sync_regs)");
-       TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
-}
-
-static void assert_initial(struct kvm_vcpu *vcpu)
-{
-       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-       struct kvm_sregs sregs;
-       struct kvm_fpu fpu;
-
-       /* KVM_GET_SREGS */
-       vcpu_sregs_get(vcpu, &sregs);
-       TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
-       TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
-                   "cr14 == 0xC2000000 (KVM_GET_SREGS)");
-       TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
-                   "cr1-13 == 0 (KVM_GET_SREGS)");
-       TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)");
-
-       /* sync regs */
-       TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL,
-                   "cr14 == 0xC2000000 (sync_regs)");
-       TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12),
-                   "cr1-13 == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)");
-       TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
-
-       /* kvm_run */
-       TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
-       TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
-
-       vcpu_fpu_get(vcpu, &fpu);
-       TEST_ASSERT(!fpu.fpc, "fpc == 0");
-
-       test_one_reg(vcpu, KVM_REG_S390_GBEA, 1);
-       test_one_reg(vcpu, KVM_REG_S390_PP, 0);
-       test_one_reg(vcpu, KVM_REG_S390_TODPR, 0);
-       test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0);
-       test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0);
-}
-
-static void assert_normal_noclear(struct kvm_vcpu *vcpu)
-{
-       struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
-
-       TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
-       TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
-}
-
-static void assert_normal(struct kvm_vcpu *vcpu)
-{
-       test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
-       TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID,
-                       "pft == 0xff.....  (sync_regs)");
-       assert_noirq(vcpu);
-}
-
-static void inject_irq(struct kvm_vcpu *vcpu)
-{
-       struct kvm_s390_irq_state irq_state;
-       struct kvm_s390_irq *irq = &buf[0];
-       int irqs;
-
-       /* Inject IRQ */
-       irq_state.len = sizeof(struct kvm_s390_irq);
-       irq_state.buf = (unsigned long)buf;
-       irq->type = KVM_S390_INT_EMERGENCY;
-       irq->u.emerg.code = vcpu->id;
-       irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
-       TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
-}
-
-static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
-{
-       struct kvm_vm *vm;
-
-       vm = vm_create(1);
-
-       *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial);
-
-       return vm;
-}
-
-static void test_normal(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       ksft_print_msg("Testing normal reset\n");
-       vm = create_vm(&vcpu);
-
-       vcpu_run(vcpu);
-
-       inject_irq(vcpu);
-
-       vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL);
-
-       /* must clears */
-       assert_normal(vcpu);
-       /* must not clears */
-       assert_normal_noclear(vcpu);
-       assert_initial_noclear(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-static void test_initial(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       ksft_print_msg("Testing initial reset\n");
-       vm = create_vm(&vcpu);
-
-       vcpu_run(vcpu);
-
-       inject_irq(vcpu);
-
-       vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL);
-
-       /* must clears */
-       assert_normal(vcpu);
-       assert_initial(vcpu);
-       /* must not clears */
-       assert_initial_noclear(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-static void test_clear(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       ksft_print_msg("Testing clear reset\n");
-       vm = create_vm(&vcpu);
-
-       vcpu_run(vcpu);
-
-       inject_irq(vcpu);
-
-       vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL);
-
-       /* must clears */
-       assert_normal(vcpu);
-       assert_initial(vcpu);
-       assert_clear(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-struct testdef {
-       const char *name;
-       void (*test)(void);
-       bool needs_cap;
-} testlist[] = {
-       { "initial", test_initial, false },
-       { "normal", test_normal, true },
-       { "clear", test_clear, true },
-};
-
-int main(int argc, char *argv[])
-{
-       bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
-       int idx;
-
-       ksft_print_header();
-       ksft_set_plan(ARRAY_SIZE(testlist));
-
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
-                       testlist[idx].test();
-                       ksft_test_result_pass("%s\n", testlist[idx].name);
-               } else {
-                       ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
-                                             testlist[idx].name);
-               }
-       }
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
deleted file mode 100644 (file)
index bba0d9a..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test shared zeropage handling (with/without storage keys)
- *
- * Copyright (C) 2024, Red Hat, Inc.
- */
-#include <sys/mman.h>
-
-#include <linux/fs.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-
-static void set_storage_key(void *addr, uint8_t skey)
-{
-       asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
-}
-
-static void guest_code(void)
-{
-       /* Issue some storage key instruction. */
-       set_storage_key((void *)0, 0x98);
-       GUEST_DONE();
-}
-
-/*
- * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
- * Returns < 0 on error or if nothing is mapped.
- */
-static int maps_shared_zeropage(int pagemap_fd, void *addr)
-{
-       struct page_region region;
-       struct pm_scan_arg arg = {
-               .start = (uintptr_t)addr,
-               .end = (uintptr_t)addr + 4096,
-               .vec = (uintptr_t)&region,
-               .vec_len = 1,
-               .size = sizeof(struct pm_scan_arg),
-               .category_mask = PAGE_IS_PFNZERO,
-               .category_anyof_mask = PAGE_IS_PRESENT,
-               .return_mask = PAGE_IS_PFNZERO,
-       };
-       return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
-}
-
-int main(int argc, char *argv[])
-{
-       char *mem, *page0, *page1, *page2, tmp;
-       const size_t pagesize = getpagesize();
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int pagemap_fd;
-
-       ksft_print_header();
-       ksft_set_plan(3);
-
-       /*
-        * We'll use memory that is not mapped into the VM for simplicity.
-        * Shared zeropages are enabled/disabled per-process.
-        */
-       mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
-       TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
-
-       /* Disable THP. Ignore errors on older kernels. */
-       madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
-
-       page0 = mem;
-       page1 = page0 + pagesize;
-       page2 = page1 + pagesize;
-
-       /* Can we even detect shared zeropages? */
-       pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
-       TEST_REQUIRE(pagemap_fd >= 0);
-
-       tmp = *page0;
-       asm volatile("" : "+r" (tmp));
-       TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       /* Verify that we get the shared zeropage after VM creation. */
-       tmp = *page1;
-       asm volatile("" : "+r" (tmp));
-       ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
-                        "Shared zeropages should be enabled\n");
-
-       /*
-        * Let our VM execute a storage key instruction that should
-        * unshare all shared zeropages.
-        */
-       vcpu_run(vcpu);
-       get_ucall(vcpu, &uc);
-       TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
-
-       /* Verify that we don't have a shared zeropage anymore. */
-       ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
-                        "Shared zeropage should be gone\n");
-
-       /* Verify that we don't get any new shared zeropages. */
-       tmp = *page2;
-       asm volatile("" : "+r" (tmp));
-       ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
-                        "Shared zeropages should be disabled\n");
-
-       kvm_vm_free(vm);
-
-       ksft_finished();
-}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
deleted file mode 100644 (file)
index 53def35..0000000
+++ /dev/null
@@ -1,238 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for s390x KVM_CAP_SYNC_REGS
- *
- * Based on the same test for x86:
- * Copyright (C) 2018, Google LLC.
- *
- * Adaptions for s390x:
- * Copyright (C) 2019, Red Hat, Inc.
- *
- * Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "diag318_test_handler.h"
-#include "kselftest.h"
-
-static void guest_code(void)
-{
-       /*
-        * We embed diag 501 here instead of doing a ucall to avoid that
-        * the compiler has messed with r11 at the time of the ucall.
-        */
-       asm volatile (
-               "0:     diag 0,0,0x501\n"
-               "       ahi 11,1\n"
-               "       j 0b\n"
-       );
-}
-
-#define REG_COMPARE(reg) \
-       TEST_ASSERT(left->reg == right->reg, \
-                   "Register " #reg \
-                   " values did not match: 0x%llx, 0x%llx", \
-                   left->reg, right->reg)
-
-#define REG_COMPARE32(reg) \
-       TEST_ASSERT(left->reg == right->reg, \
-                   "Register " #reg \
-                   " values did not match: 0x%x, 0x%x", \
-                   left->reg, right->reg)
-
-
-static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
-{
-       int i;
-
-       for (i = 0; i < 16; i++)
-               REG_COMPARE(gprs[i]);
-}
-
-static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
-{
-       int i;
-
-       for (i = 0; i < 16; i++)
-               REG_COMPARE32(acrs[i]);
-
-       for (i = 0; i < 16; i++)
-               REG_COMPARE(crs[i]);
-}
-
-#undef REG_COMPARE
-
-#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
-#define INVALID_SYNC_FIELD 0x80000000
-
-void test_read_invalid(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Request reading invalid register set from VCPU. */
-       run->kvm_valid_regs = INVALID_SYNC_FIELD;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_valid_regs = 0;
-
-       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_valid_regs = 0;
-}
-
-void test_set_invalid(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Request setting invalid register set into VCPU. */
-       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_dirty_regs = 0;
-
-       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_dirty_regs = 0;
-}
-
-void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-       int rv;
-
-       /* Request and verify all valid register sets. */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
-                   (run->s390_sieic.ipa >> 8) == 0x83 &&
-                   (run->s390_sieic.ipb >> 16) == 0x501,
-                   "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
-                   run->s390_sieic.icptcode, run->s390_sieic.ipa,
-                   run->s390_sieic.ipb);
-
-       vcpu_regs_get(vcpu, &regs);
-       compare_regs(&regs, &run->s.regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       compare_sregs(&sregs, &run->s.regs);
-}
-
-void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-       int rv;
-
-       /* Set and verify various register values */
-       run->s.regs.gprs[11] = 0xBAD1DEA;
-       run->s.regs.acrs[0] = 1 << 11;
-
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
-
-       if (get_diag318_info() > 0) {
-               run->s.regs.diag318 = get_diag318_info();
-               run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
-       }
-
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
-                   "r11 sync regs value incorrect 0x%llx.",
-                   run->s.regs.gprs[11]);
-       TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
-                   "acr0 sync regs value incorrect 0x%x.",
-                   run->s.regs.acrs[0]);
-       TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
-                   "diag318 sync regs value incorrect 0x%llx.",
-                   run->s.regs.diag318);
-
-       vcpu_regs_get(vcpu, &regs);
-       compare_regs(&regs, &run->s.regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       compare_sregs(&sregs, &run->s.regs);
-}
-
-void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Clear kvm_dirty_regs bits, verify new s.regs values are
-        * overwritten with existing guest values.
-        */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       run->kvm_dirty_regs = 0;
-       run->s.regs.gprs[11] = 0xDEADBEEF;
-       run->s.regs.diag318 = 0x4B1D;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
-       TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
-                   "r11 sync regs value incorrect 0x%llx.",
-                   run->s.regs.gprs[11]);
-       TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
-                   "diag318 sync regs value incorrect 0x%llx.",
-                   run->s.regs.diag318);
-}
-
-struct testdef {
-       const char *name;
-       void (*test)(struct kvm_vcpu *vcpu);
-} testlist[] = {
-       { "read invalid", test_read_invalid },
-       { "set invalid", test_set_invalid },
-       { "request+verify all valid regs", test_req_and_verify_all_valid_regs },
-       { "set+verify various regs", test_set_and_verify_various_reg_values },
-       { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
-};
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       int idx;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
-
-       ksft_print_header();
-
-       ksft_set_plan(ARRAY_SIZE(testlist));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
-               testlist[idx].test(vcpu);
-               ksft_test_result_pass("%s\n", testlist[idx].name);
-       }
-
-       kvm_vm_free(vm);
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
deleted file mode 100644 (file)
index 12d5e1c..0000000
+++ /dev/null
@@ -1,244 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Test TEST PROTECTION emulation.
- *
- * Copyright IBM Corp. 2021
- */
-#include <sys/mman.h>
-#include "test_util.h"
-#include "kvm_util.h"
-#include "kselftest.h"
-#include "ucall_common.h"
-#include "processor.h"
-
-#define CR0_FETCH_PROTECTION_OVERRIDE  (1UL << (63 - 38))
-#define CR0_STORAGE_PROTECTION_OVERRIDE        (1UL << (63 - 39))
-
-static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
-static uint8_t *const page_store_prot = pages[0];
-static uint8_t *const page_fetch_prot = pages[1];
-
-/* Nonzero return value indicates that address not mapped */
-static int set_storage_key(void *addr, uint8_t key)
-{
-       int not_mapped = 0;
-
-       asm volatile (
-                      "lra     %[addr], 0(0,%[addr])\n"
-               "       jz      0f\n"
-               "       llill   %[not_mapped],1\n"
-               "       j       1f\n"
-               "0:     sske    %[key], %[addr]\n"
-               "1:"
-               : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
-               : [key] "r" (key)
-               : "cc"
-       );
-       return -not_mapped;
-}
-
-enum permission {
-       READ_WRITE = 0,
-       READ = 1,
-       RW_PROTECTED = 2,
-       TRANSL_UNAVAIL = 3,
-};
-
-static enum permission test_protection(void *addr, uint8_t key)
-{
-       uint64_t mask;
-
-       asm volatile (
-                      "tprot   %[addr], 0(%[key])\n"
-               "       ipm     %[mask]\n"
-               : [mask] "=r" (mask)
-               : [addr] "Q" (*(char *)addr),
-                 [key] "a" (key)
-               : "cc"
-       );
-
-       return (enum permission)(mask >> 28);
-}
-
-enum stage {
-       STAGE_INIT_SIMPLE,
-       TEST_SIMPLE,
-       STAGE_INIT_FETCH_PROT_OVERRIDE,
-       TEST_FETCH_PROT_OVERRIDE,
-       TEST_STORAGE_PROT_OVERRIDE,
-       STAGE_END       /* must be the last entry (it's the amount of tests) */
-};
-
-struct test {
-       enum stage stage;
-       void *addr;
-       uint8_t key;
-       enum permission expected;
-} tests[] = {
-       /*
-        * We perform each test in the array by executing TEST PROTECTION on
-        * the specified addr with the specified key and checking if the returned
-        * permissions match the expected value.
-        * Both guest and host cooperate to set up the required test conditions.
-        * A central condition is that the page targeted by addr has to be DAT
-        * protected in the host mappings, in order for KVM to emulate the
-        * TEST PROTECTION instruction.
-        * Since the page tables are shared, the host uses mprotect to achieve
-        * this.
-        *
-        * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted
-        * by SIE, not KVM, but there is no harm in testing them also.
-        * See Enhanced Suppression-on-Protection Facilities in the
-        * Interpretive-Execution Mode
-        */
-       /*
-        * guest: set storage key of page_store_prot to 1
-        *        storage key of page_fetch_prot to 9 and enable
-        *        protection for it
-        * STAGE_INIT_SIMPLE
-        * host: write protect both via mprotect
-        */
-       /* access key 0 matches any storage key -> RW */
-       { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
-       /* access key matches storage key -> RW */
-       { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
-       /* mismatched keys, but no fetch protection -> RO */
-       { TEST_SIMPLE, page_store_prot, 0x20, READ },
-       /* access key 0 matches any storage key -> RW */
-       { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
-       /* access key matches storage key -> RW */
-       { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
-       /* mismatched keys, fetch protection -> inaccessible */
-       { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED },
-       /* page 0 not mapped yet -> translation not available */
-       { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL },
-       /*
-        * host: try to map page 0
-        * guest: set storage key of page 0 to 9 and enable fetch protection
-        * STAGE_INIT_FETCH_PROT_OVERRIDE
-        * host: write protect page 0
-        *       enable fetch protection override
-        */
-       /* mismatched keys, fetch protection, but override applies -> RO */
-       { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
-       /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */
-       { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED },
-       /*
-        * host: enable storage protection override
-        */
-       /* mismatched keys, but override applies (storage key 9) -> RW */
-       { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
-       /* mismatched keys, no fetch protection, override doesn't apply -> RO */
-       { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
-       /* mismatched keys, but override applies (storage key 9) -> RW */
-       { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
-       /* end marker */
-       { STAGE_END, 0, 0, 0 },
-};
-
-static enum stage perform_next_stage(int *i, bool mapped_0)
-{
-       enum stage stage = tests[*i].stage;
-       enum permission result;
-       bool skip;
-
-       for (; tests[*i].stage == stage; (*i)++) {
-               /*
-                * Some fetch protection override tests require that page 0
-                * be mapped, however, when the hosts tries to map that page via
-                * vm_vaddr_alloc, it may happen that some other page gets mapped
-                * instead.
-                * In order to skip these tests we detect this inside the guest
-                */
-               skip = tests[*i].addr < (void *)PAGE_SIZE &&
-                      tests[*i].expected != TRANSL_UNAVAIL &&
-                      !mapped_0;
-               if (!skip) {
-                       result = test_protection(tests[*i].addr, tests[*i].key);
-                       __GUEST_ASSERT(result == tests[*i].expected,
-                                      "Wanted %u, got %u, for i = %u",
-                                      tests[*i].expected, result, *i);
-               }
-       }
-       return stage;
-}
-
-static void guest_code(void)
-{
-       bool mapped_0;
-       int i = 0;
-
-       GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
-       GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
-       GUEST_SYNC(STAGE_INIT_SIMPLE);
-       GUEST_SYNC(perform_next_stage(&i, false));
-
-       /* Fetch-protection override */
-       mapped_0 = !set_storage_key((void *)0, 0x98);
-       GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
-       GUEST_SYNC(perform_next_stage(&i, mapped_0));
-
-       /* Storage-protection override */
-       GUEST_SYNC(perform_next_stage(&i, mapped_0));
-}
-
-#define HOST_SYNC_NO_TAP(vcpup, stage)                         \
-({                                                             \
-       struct kvm_vcpu *__vcpu = (vcpup);                      \
-       struct ucall uc;                                        \
-       int __stage = (stage);                                  \
-                                                               \
-       vcpu_run(__vcpu);                                       \
-       get_ucall(__vcpu, &uc);                                 \
-       if (uc.cmd == UCALL_ABORT)                              \
-               REPORT_GUEST_ASSERT(uc);                        \
-       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                     \
-       TEST_ASSERT_EQ(uc.args[1], __stage);                    \
-})
-
-#define HOST_SYNC(vcpu, stage)                 \
-({                                             \
-       HOST_SYNC_NO_TAP(vcpu, stage);          \
-       ksft_test_result_pass("" #stage "\n");  \
-})
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_run *run;
-       vm_vaddr_t guest_0_page;
-
-       ksft_print_header();
-       ksft_set_plan(STAGE_END);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-
-       HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
-       mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
-       HOST_SYNC(vcpu, TEST_SIMPLE);
-
-       guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
-       if (guest_0_page != 0) {
-               /* Use NO_TAP so we don't get a PASS print */
-               HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
-               ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
-                                     "Did not allocate page at 0\n");
-       } else {
-               HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
-       }
-       if (guest_0_page == 0)
-               mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
-       run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
-       run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
-
-       run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
-       run->kvm_dirty_regs = KVM_SYNC_CRS;
-       HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE);
-
-       kvm_vm_free(vm);
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c
deleted file mode 100644 (file)
index 0c11231..0000000
+++ /dev/null
@@ -1,638 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test code for the s390x kvm ucontrol interface
- *
- * Copyright IBM Corp. 2024
- *
- * Authors:
- *  Christoph Schlameuss <schlameuss@linux.ibm.com>
- */
-#include "debug_print.h"
-#include "kselftest_harness.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "sie.h"
-
-#include <linux/capability.h>
-#include <linux/sizes.h>
-
-#define PGM_SEGMENT_TRANSLATION 0x10
-
-#define VM_MEM_SIZE (4 * SZ_1M)
-#define VM_MEM_EXT_SIZE (2 * SZ_1M)
-#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M)
-
-/* so directly declare capget to check caps without libcap */
-int capget(cap_user_header_t header, cap_user_data_t data);
-
-/**
- * In order to create user controlled virtual machines on S390,
- * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL
- * as privileged user (SYS_ADMIN).
- */
-void require_ucontrol_admin(void)
-{
-       struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
-       struct __user_cap_header_struct hdr = {
-               .version = _LINUX_CAPABILITY_VERSION_3,
-       };
-       int rc;
-
-       rc = capget(&hdr, data);
-       TEST_ASSERT_EQ(0, rc);
-       TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0);
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
-}
-
-/* Test program setting some registers and looping */
-extern char test_gprs_asm[];
-asm("test_gprs_asm:\n"
-       "xgr    %r0, %r0\n"
-       "lgfi   %r1,1\n"
-       "lgfi   %r2,2\n"
-       "lgfi   %r3,3\n"
-       "lgfi   %r4,4\n"
-       "lgfi   %r5,5\n"
-       "lgfi   %r6,6\n"
-       "lgfi   %r7,7\n"
-       "0:\n"
-       "       diag    0,0,0x44\n"
-       "       ahi     %r0,1\n"
-       "       j       0b\n"
-);
-
-/* Test program manipulating memory */
-extern char test_mem_asm[];
-asm("test_mem_asm:\n"
-       "xgr    %r0, %r0\n"
-
-       "0:\n"
-       "       ahi     %r0,1\n"
-       "       st      %r1,0(%r5,%r6)\n"
-
-       "       xgr     %r1,%r1\n"
-       "       l       %r1,0(%r5,%r6)\n"
-       "       ahi     %r0,1\n"
-       "       diag    0,0,0x44\n"
-
-       "       j       0b\n"
-);
-
-/* Test program manipulating storage keys */
-extern char test_skey_asm[];
-asm("test_skey_asm:\n"
-       "xgr    %r0, %r0\n"
-
-       "0:\n"
-       "       ahi     %r0,1\n"
-       "       st      %r1,0(%r5,%r6)\n"
-
-       "       iske    %r1,%r6\n"
-       "       ahi     %r0,1\n"
-       "       diag    0,0,0x44\n"
-
-       "       sske    %r1,%r6\n"
-       "       xgr     %r1,%r1\n"
-       "       iske    %r1,%r6\n"
-       "       ahi     %r0,1\n"
-       "       diag    0,0,0x44\n"
-
-       "       rrbe    %r1,%r6\n"
-       "       iske    %r1,%r6\n"
-       "       ahi     %r0,1\n"
-       "       diag    0,0,0x44\n"
-
-       "       j       0b\n"
-);
-
-FIXTURE(uc_kvm)
-{
-       struct kvm_s390_sie_block *sie_block;
-       struct kvm_run *run;
-       uintptr_t base_gpa;
-       uintptr_t code_gpa;
-       uintptr_t base_hva;
-       uintptr_t code_hva;
-       int kvm_run_size;
-       vm_paddr_t pgd;
-       void *vm_mem;
-       int vcpu_fd;
-       int kvm_fd;
-       int vm_fd;
-};
-
-/**
- * create VM with single vcpu, map kvm_run and SIE control block for easy access
- */
-FIXTURE_SETUP(uc_kvm)
-{
-       struct kvm_s390_vm_cpu_processor info;
-       int rc;
-
-       require_ucontrol_admin();
-
-       self->kvm_fd = open_kvm_dev_path_or_exit();
-       self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
-       ASSERT_GE(self->vm_fd, 0);
-
-       kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL,
-                           KVM_S390_VM_CPU_PROCESSOR, &info);
-       TH_LOG("create VM 0x%llx", info.cpuid);
-
-       self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
-       ASSERT_GE(self->vcpu_fd, 0);
-
-       self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
-       ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
-                 TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
-       self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
-                   PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
-       ASSERT_NE(self->run, MAP_FAILED);
-       /**
-        * For virtual cpus that have been created with S390 user controlled
-        * virtual machines, the resulting vcpu fd can be memory mapped at page
-        * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
-        * the virtual cpu's hardware control block.
-        */
-       self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
-                         PROT_READ | PROT_WRITE, MAP_SHARED,
-                         self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
-       ASSERT_NE(self->sie_block, MAP_FAILED);
-
-       TH_LOG("VM created %p %p", self->run, self->sie_block);
-
-       self->base_gpa = 0;
-       self->code_gpa = self->base_gpa + (3 * SZ_1M);
-
-       self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M);
-       ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno);
-       self->base_hva = (uintptr_t)self->vm_mem;
-       self->code_hva = self->base_hva - self->base_gpa + self->code_gpa;
-       struct kvm_s390_ucas_mapping map = {
-               .user_addr = self->base_hva,
-               .vcpu_addr = self->base_gpa,
-               .length = VM_MEM_SIZE,
-       };
-       TH_LOG("ucas map %p %p 0x%llx",
-              (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
-       rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
-       ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s",
-                               rc, strerror(errno));
-
-       TH_LOG("page in %p", (void *)self->base_gpa);
-       rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa);
-       ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s",
-                               (void *)self->base_hva, rc, strerror(errno));
-
-       self->sie_block->cpuflags &= ~CPUSTAT_STOPPED;
-}
-
-FIXTURE_TEARDOWN(uc_kvm)
-{
-       munmap(self->sie_block, PAGE_SIZE);
-       munmap(self->run, self->kvm_run_size);
-       close(self->vcpu_fd);
-       close(self->vm_fd);
-       close(self->kvm_fd);
-       free(self->vm_mem);
-}
-
-TEST_F(uc_kvm, uc_sie_assertions)
-{
-       /* assert interception of Code 08 (Program Interruption) is set */
-       EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI);
-}
-
-TEST_F(uc_kvm, uc_attr_mem_limit)
-{
-       u64 limit;
-       struct kvm_device_attr attr = {
-               .group = KVM_S390_VM_MEM_CTRL,
-               .attr = KVM_S390_VM_MEM_LIMIT_SIZE,
-               .addr = (unsigned long)&limit,
-       };
-       int rc;
-
-       rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr);
-       EXPECT_EQ(0, rc);
-       EXPECT_EQ(~0UL, limit);
-
-       /* assert set not supported */
-       rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr);
-       EXPECT_EQ(-1, rc);
-       EXPECT_EQ(EINVAL, errno);
-}
-
-TEST_F(uc_kvm, uc_no_dirty_log)
-{
-       struct kvm_dirty_log dlog;
-       int rc;
-
-       rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog);
-       EXPECT_EQ(-1, rc);
-       EXPECT_EQ(EINVAL, errno);
-}
-
-/**
- * Assert HPAGE CAP cannot be enabled on UCONTROL VM
- */
-TEST(uc_cap_hpage)
-{
-       int rc, kvm_fd, vm_fd, vcpu_fd;
-       struct kvm_enable_cap cap = {
-               .cap = KVM_CAP_S390_HPAGE_1M,
-       };
-
-       require_ucontrol_admin();
-
-       kvm_fd = open_kvm_dev_path_or_exit();
-       vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL);
-       ASSERT_GE(vm_fd, 0);
-
-       /* assert hpages are not supported on ucontrol vm */
-       rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M);
-       EXPECT_EQ(0, rc);
-
-       /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */
-       rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
-       EXPECT_EQ(-1, rc);
-       EXPECT_EQ(EINVAL, errno);
-
-       /* assert HPAGE CAP is rejected after vCPU creation */
-       vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
-       ASSERT_GE(vcpu_fd, 0);
-       rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap);
-       EXPECT_EQ(-1, rc);
-       EXPECT_EQ(EBUSY, errno);
-
-       close(vcpu_fd);
-       close(vm_fd);
-       close(kvm_fd);
-}
-
-/* calculate host virtual addr from guest physical addr */
-static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa)
-{
-       return (void *)(self->base_hva - self->base_gpa + gpa);
-}
-
-/* map / make additional memory available */
-static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
-{
-       struct kvm_s390_ucas_mapping map = {
-               .user_addr = (u64)gpa2hva(self, vcpu_addr),
-               .vcpu_addr = vcpu_addr,
-               .length = length,
-       };
-       pr_info("ucas map %p %p 0x%llx",
-               (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
-       return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map);
-}
-
-/* unmap previously mapped memory */
-static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length)
-{
-       struct kvm_s390_ucas_mapping map = {
-               .user_addr = (u64)gpa2hva(self, vcpu_addr),
-               .vcpu_addr = vcpu_addr,
-               .length = length,
-       };
-       pr_info("ucas unmap %p %p 0x%llx",
-               (void *)map.user_addr, (void *)map.vcpu_addr, map.length);
-       return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map);
-}
-
-/* handle ucontrol exit by mapping the accessed segment */
-static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_run *run = self->run;
-       u64 seg_addr;
-       int rc;
-
-       TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
-       switch (run->s390_ucontrol.pgm_code) {
-       case PGM_SEGMENT_TRANSLATION:
-               seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1);
-               pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n",
-                       run->s390_ucontrol.trans_exc_code, seg_addr);
-               /* map / make additional memory available */
-               rc = uc_map_ext(self, seg_addr, SZ_1M);
-               TEST_ASSERT_EQ(0, rc);
-               break;
-       default:
-               TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code);
-       }
-}
-
-/*
- * Handle the SIEIC exit
- * * fail on codes not expected in the test cases
- * Returns if interception is handled / execution can be continued
- */
-static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-
-       /* disable KSS */
-       sie_block->cpuflags &= ~CPUSTAT_KSS;
-       /* disable skey inst interception */
-       sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
-}
-
-/*
- * Handle the instruction intercept
- * Returns if interception is handled / execution can be continued
- */
-static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-       int ilen = insn_length(sie_block->ipa >> 8);
-       struct kvm_run *run = self->run;
-
-       switch (run->s390_sieic.ipa) {
-       case 0xB229: /* ISKE */
-       case 0xB22b: /* SSKE */
-       case 0xB22a: /* RRBE */
-               uc_skey_enable(self);
-
-               /* rewind to reexecute intercepted instruction */
-               run->psw_addr = run->psw_addr - ilen;
-               pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr);
-               return true;
-       default:
-               return false;
-       }
-}
-
-/*
- * Handle the SIEIC exit
- * * fail on codes not expected in the test cases
- * Returns if interception is handled / execution can be continued
- */
-static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-       struct kvm_run *run = self->run;
-
-       /* check SIE interception code */
-       pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n",
-               run->s390_sieic.icptcode,
-               run->s390_sieic.ipa,
-               run->s390_sieic.ipb);
-       switch (run->s390_sieic.icptcode) {
-       case ICPT_INST:
-               /* end execution in caller on intercepted instruction */
-               pr_info("sie instruction interception\n");
-               return uc_handle_insn_ic(self);
-       case ICPT_KSS:
-               uc_skey_enable(self);
-               return true;
-       case ICPT_OPEREXC:
-               /* operation exception */
-               TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb);
-       default:
-               TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode);
-       }
-       return true;
-}
-
-/* verify VM state on exit */
-static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_run *run = self->run;
-
-       switch (run->exit_reason) {
-       case KVM_EXIT_S390_UCONTROL:
-               /** check program interruption code
-                * handle page fault --> ucas map
-                */
-               uc_handle_exit_ucontrol(self);
-               break;
-       case KVM_EXIT_S390_SIEIC:
-               return uc_handle_sieic(self);
-       default:
-               pr_info("exit_reason %2d not handled\n", run->exit_reason);
-       }
-       return true;
-}
-
-/* run the VM until interrupted */
-static int uc_run_once(FIXTURE_DATA(uc_kvm) *self)
-{
-       int rc;
-
-       rc = ioctl(self->vcpu_fd, KVM_RUN, NULL);
-       print_run(self->run, self->sie_block);
-       print_regs(self->run);
-       pr_debug("run %d / %d %s\n", rc, errno, strerror(errno));
-       return rc;
-}
-
-static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-
-       /* assert vm was interrupted by diag 0x0044 */
-       TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
-       TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
-       TEST_ASSERT_EQ(0x8300, sie_block->ipa);
-       TEST_ASSERT_EQ(0x440000, sie_block->ipb);
-}
-
-TEST_F(uc_kvm, uc_no_user_region)
-{
-       struct kvm_userspace_memory_region region = {
-               .slot = 1,
-               .guest_phys_addr = self->code_gpa,
-               .memory_size = VM_MEM_EXT_SIZE,
-               .userspace_addr = (uintptr_t)self->code_hva,
-       };
-       struct kvm_userspace_memory_region2 region2 = {
-               .slot = 1,
-               .guest_phys_addr = self->code_gpa,
-               .memory_size = VM_MEM_EXT_SIZE,
-               .userspace_addr = (uintptr_t)self->code_hva,
-       };
-
-       ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &region));
-       ASSERT_EQ(EINVAL, errno);
-
-       ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, &region2));
-       ASSERT_EQ(EINVAL, errno);
-}
-
-TEST_F(uc_kvm, uc_map_unmap)
-{
-       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
-       struct kvm_run *run = self->run;
-       const u64 disp = 1;
-       int rc;
-
-       /* copy test_mem_asm to code_hva / code_gpa */
-       TH_LOG("copy code %p to vm mapped memory %p / %p",
-              &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa);
-       memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE);
-
-       /* DAT disabled + 64 bit mode */
-       run->psw_mask = 0x0000000180000000ULL;
-       run->psw_addr = self->code_gpa;
-
-       /* set register content for test_mem_asm to access not mapped memory*/
-       sync_regs->gprs[1] = 0x55;
-       sync_regs->gprs[5] = self->base_gpa;
-       sync_regs->gprs[6] = VM_MEM_SIZE + disp;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-
-       /* run and expect to fail with ucontrol pic segment translation */
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(1, sync_regs->gprs[0]);
-       ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
-
-       ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
-       ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code);
-
-       /* fail to map memory with not segment aligned address */
-       rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE);
-       ASSERT_GT(0, rc)
-               TH_LOG("ucas map for non segment address should fail but didn't; "
-                      "result %d not expected, %s", rc, strerror(errno));
-
-       /* map / make additional memory available */
-       rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
-       ASSERT_EQ(0, rc)
-               TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno));
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       uc_assert_diag44(self);
-
-       /* assert registers and memory are in expected state */
-       ASSERT_EQ(2, sync_regs->gprs[0]);
-       ASSERT_EQ(0x55, sync_regs->gprs[1]);
-       ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp));
-
-       /* unmap and run loop again */
-       rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE);
-       ASSERT_EQ(0, rc)
-               TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno));
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(3, sync_regs->gprs[0]);
-       ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason);
-       ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code);
-       /* handle ucontrol exit and remap memory after previous map and unmap */
-       ASSERT_EQ(true, uc_handle_exit(self));
-}
-
-TEST_F(uc_kvm, uc_gprs)
-{
-       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
-       struct kvm_run *run = self->run;
-       struct kvm_regs regs = {};
-
-       /* Set registers to values that are different from the ones that we expect below */
-       for (int i = 0; i < 8; i++)
-               sync_regs->gprs[i] = 8;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-
-       /* copy test_gprs_asm to code_hva / code_gpa */
-       TH_LOG("copy code %p to vm mapped memory %p / %p",
-              &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa);
-       memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE);
-
-       /* DAT disabled + 64 bit mode */
-       run->psw_mask = 0x0000000180000000ULL;
-       run->psw_addr = self->code_gpa;
-
-       /* run and expect interception of diag 44 */
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       uc_assert_diag44(self);
-
-       /* Retrieve and check guest register values */
-       ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
-       for (int i = 0; i < 8; i++) {
-               ASSERT_EQ(i, regs.gprs[i]);
-               ASSERT_EQ(i, sync_regs->gprs[i]);
-       }
-
-       /* run and expect interception of diag 44 again */
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       uc_assert_diag44(self);
-
-       /* check continued increment of register 0 value */
-       ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, &regs));
-       ASSERT_EQ(1, regs.gprs[0]);
-       ASSERT_EQ(1, sync_regs->gprs[0]);
-}
-
-TEST_F(uc_kvm, uc_skey)
-{
-       struct kvm_s390_sie_block *sie_block = self->sie_block;
-       struct kvm_sync_regs *sync_regs = &self->run->s.regs;
-       u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2);
-       struct kvm_run *run = self->run;
-       const u8 skeyvalue = 0x34;
-
-       /* copy test_skey_asm to code_hva / code_gpa */
-       TH_LOG("copy code %p to vm mapped memory %p / %p",
-              &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa);
-       memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE);
-
-       /* set register content for test_skey_asm to access not mapped memory */
-       sync_regs->gprs[1] = skeyvalue;
-       sync_regs->gprs[5] = self->base_gpa;
-       sync_regs->gprs[6] = test_vaddr;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-
-       /* DAT disabled + 64 bit mode */
-       run->psw_mask = 0x0000000180000000ULL;
-       run->psw_addr = self->code_gpa;
-
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(true, uc_handle_exit(self));
-       ASSERT_EQ(1, sync_regs->gprs[0]);
-
-       /* ISKE */
-       ASSERT_EQ(0, uc_run_once(self));
-
-       /*
-        * Bail out and skip the test after uc_skey_enable was executed but iske
-        * is still intercepted. Instructions are not handled by the kernel.
-        * Thus there is no need to test this here.
-        */
-       TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS);
-       TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
-       TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
-       TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
-       TEST_REQUIRE(sie_block->ipa != 0xb229);
-
-       /* ISKE contd. */
-       ASSERT_EQ(false, uc_handle_exit(self));
-       ASSERT_EQ(2, sync_regs->gprs[0]);
-       /* assert initial skey (ACC = 0, R & C = 1) */
-       ASSERT_EQ(0x06, sync_regs->gprs[1]);
-       uc_assert_diag44(self);
-
-       /* SSKE + ISKE */
-       sync_regs->gprs[1] = skeyvalue;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       ASSERT_EQ(3, sync_regs->gprs[0]);
-       ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
-       uc_assert_diag44(self);
-
-       /* RRBE + ISKE */
-       sync_regs->gprs[1] = skeyvalue;
-       run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-       ASSERT_EQ(0, uc_run_once(self));
-       ASSERT_EQ(false, uc_handle_exit(self));
-       ASSERT_EQ(4, sync_regs->gprs[0]);
-       /* assert R reset but rest of skey unchanged */
-       ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
-       ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
-       uc_assert_diag44(self);
-}
-
-TEST_HARNESS_MAIN
index a8267628e9ed130cffd24ad525dd3cad1c679328..86ee3385e860be47bf37b7bd8ba7d11533751459 100644 (file)
@@ -17,9 +17,9 @@
 #include <processor.h>
 
 /*
- * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
- * 2MB sized and aligned region so that the initial region corresponds to
- * exactly one large page.
+ * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB
+ * sized and aligned region so that the initial region corresponds to exactly
+ * one large page.
  */
 #define MEM_REGION_SIZE                0x200000
 
diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c
new file mode 100644 (file)
index 0000000..f4ce5a1
--- /dev/null
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define NUM_TILES                      8
+#define TILE_SIZE                      1024
+#define XSAVE_SIZE                     ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define PALETTE_TABLE_INDEX            1
+#define MAX_TILES                      16
+#define RESERVED_BYTES                 14
+
+#define XSAVE_HDR_OFFSET               512
+
+struct tile_config {
+       u8  palette_id;
+       u8  start_row;
+       u8  reserved[RESERVED_BYTES];
+       u16 colsb[MAX_TILES];
+       u8  rows[MAX_TILES];
+};
+
+struct tile_data {
+       u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+       u16 bytes_per_tile;
+       u16 bytes_per_row;
+       u16 max_names;
+       u16 max_rows;
+       u32 xsave_offset;
+       u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline void __ldtilecfg(void *cfg)
+{
+       asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+                    : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+       asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+                    : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+       asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
+{
+       uint32_t rfbm_lo = rfbm;
+       uint32_t rfbm_hi = rfbm >> 32;
+
+       asm volatile("xsavec (%%rdi)"
+                    : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
+                    : "memory");
+}
+
+static void check_xtile_info(void)
+{
+       GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
+
+       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
+       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
+
+       xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
+       GUEST_ASSERT(xtile.xsave_offset == 2816);
+       xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
+       GUEST_ASSERT(xtile.xsave_size == 8192);
+       GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
+
+       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
+       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
+                    PALETTE_TABLE_INDEX);
+
+       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
+       xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
+       GUEST_ASSERT(xtile.max_names == 8);
+       xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
+       GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+       xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
+       GUEST_ASSERT(xtile.bytes_per_row == 64);
+       xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
+       GUEST_ASSERT(xtile.max_rows == 16);
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+       int i;
+
+       /* Only palette id 1 */
+       cfg->palette_id = 1;
+       for (i = 0; i < xtile.max_names; i++) {
+               cfg->colsb[i] = xtile.bytes_per_row;
+               cfg->rows[i] = xtile.max_rows;
+       }
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+                                                   struct tile_data *tiledata,
+                                                   struct xstate *xstate)
+{
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
+                    this_cpu_has(X86_FEATURE_OSXSAVE));
+       check_xtile_info();
+       GUEST_SYNC(1);
+
+       /* xfd=0, enable amx */
+       wrmsr(MSR_IA32_XFD, 0);
+       GUEST_SYNC(2);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+       set_tilecfg(amx_cfg);
+       __ldtilecfg(amx_cfg);
+       GUEST_SYNC(3);
+       /* Check save/restore when trap to userspace */
+       __tileloadd(tiledata);
+       GUEST_SYNC(4);
+       __tilerelease();
+       GUEST_SYNC(5);
+       /*
+        * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
+        * the xcomp_bv.
+        */
+       xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+       __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+       GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+       GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
+
+       /* xfd=0x40000, disable amx tiledata */
+       wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+       /*
+        * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
+        * remains the same even when amx tiledata is disabled by IA32_XFD.
+        */
+       xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+       __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+       GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+       GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
+
+       GUEST_SYNC(6);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+       set_tilecfg(amx_cfg);
+       __ldtilecfg(amx_cfg);
+       /* Trigger #NM exception */
+       __tileloadd(tiledata);
+       GUEST_SYNC(10);
+
+       GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+       /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
+       GUEST_SYNC(7);
+       GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+       GUEST_SYNC(8);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+       /* Clear xfd_err */
+       wrmsr(MSR_IA32_XFD_ERR, 0);
+       /* xfd=0, enable amx */
+       wrmsr(MSR_IA32_XFD, 0);
+       GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_regs regs1, regs2;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_x86_state *state;
+       int xsave_restore_size;
+       vm_vaddr_t amx_cfg, tiledata, xstate;
+       struct ucall uc;
+       u32 amx_offset;
+       int ret;
+
+       /*
+        * Note, all off-by-default features must be enabled before anything
+        * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
+        */
+       vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
+                   "KVM should enumerate max XSAVE size when XSAVE is supported");
+       xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
+
+       vcpu_regs_get(vcpu, &regs1);
+
+       /* Register #NM handler */
+       vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+       /* amx cfg for guest_code */
+       amx_cfg = vm_vaddr_alloc_page(vm);
+       memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+       /* amx tiledata for guest_code */
+       tiledata = vm_vaddr_alloc_pages(vm, 2);
+       memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+       /* XSAVE state for guest_code */
+       xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+       memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+       vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
+
+       for (;;) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       switch (uc.args[1]) {
+                       case 1:
+                       case 2:
+                       case 3:
+                       case 5:
+                       case 6:
+                       case 7:
+                       case 8:
+                               fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+                               break;
+                       case 4:
+                       case 10:
+                               fprintf(stderr,
+                               "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+                               /* Compacted mode, get amx offset by xsave area
+                                * size subtract 8K amx size.
+                                */
+                               amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+                               state = vcpu_save_state(vcpu);
+                               void *amx_start = (void *)state->xsave + amx_offset;
+                               void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+                               /* Only check TMM0 register, 1 tile */
+                               ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+                               TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+                               kvm_x86_state_cleanup(state);
+                               break;
+                       case 9:
+                               fprintf(stderr,
+                               "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+                               break;
+                       }
+                       break;
+               case UCALL_DONE:
+                       fprintf(stderr, "UCALL_DONE\n");
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               state = vcpu_save_state(vcpu);
+               memset(&regs1, 0, sizeof(regs1));
+               vcpu_regs_get(vcpu, &regs1);
+
+               kvm_vm_release(vm);
+
+               /* Restore state in a new VM.  */
+               vcpu = vm_recreate_with_one_vcpu(vm);
+               vcpu_load_state(vcpu, state);
+               kvm_x86_state_cleanup(state);
+
+               memset(&regs2, 0, sizeof(regs2));
+               vcpu_regs_get(vcpu, &regs2);
+               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+                           (ulong) regs2.rdi, (ulong) regs2.rsi);
+       }
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
new file mode 100644 (file)
index 0000000..f8916bb
--- /dev/null
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024 Intel Corporation
+ *
+ * Verify KVM correctly emulates the APIC bus frequency when the VMM configures
+ * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS.  Start the APIC timer by
+ * programming TMICT (timer initial count) to the largest value possible (so
+ * that the timer will not expire during the test).  Then, after an arbitrary
+ * amount of time has elapsed, verify TMCCT (timer current count) is within 1%
+ * of the expected value based on the time elapsed, the APIC bus frequency, and
+ * the programmed TDCR (timer divide configuration register).
+ */
+
+#include "apic.h"
+#include "test_util.h"
+
+/*
+ * Possible TDCR values with matching divide count. Used to modify APIC
+ * timer frequency.
+ */
+static const struct {
+       const uint32_t tdcr;
+       const uint32_t divide_count;
+} tdcrs[] = {
+       {0x0, 2},
+       {0x1, 4},
+       {0x2, 8},
+       {0x3, 16},
+       {0x8, 32},
+       {0x9, 64},
+       {0xa, 128},
+       {0xb, 1},
+};
+
+static bool is_x2apic;
+
+static void apic_enable(void)
+{
+       if (is_x2apic)
+               x2apic_enable();
+       else
+               xapic_enable();
+}
+
+static uint32_t apic_read_reg(unsigned int reg)
+{
+       return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
+}
+
+static void apic_write_reg(unsigned int reg, uint32_t val)
+{
+       if (is_x2apic)
+               x2apic_write_reg(reg, val);
+       else
+               xapic_write_reg(reg, val);
+}
+
+static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
+{
+       uint64_t tsc_hz = guest_tsc_khz * 1000;
+       const uint32_t tmict = ~0u;
+       uint64_t tsc0, tsc1, freq;
+       uint32_t tmcct;
+       int i;
+
+       apic_enable();
+
+       /*
+        * Setup one-shot timer.  The vector does not matter because the
+        * interrupt should not fire.
+        */
+       apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED);
+
+       for (i = 0; i < ARRAY_SIZE(tdcrs); i++) {
+               apic_write_reg(APIC_TDCR, tdcrs[i].tdcr);
+               apic_write_reg(APIC_TMICT, tmict);
+
+               tsc0 = rdtsc();
+               udelay(delay_ms * 1000);
+               tmcct = apic_read_reg(APIC_TMCCT);
+               tsc1 = rdtsc();
+
+               /*
+                * Stop the timer _after_ reading the current, final count, as
+                * writing the initial counter also modifies the current count.
+                */
+               apic_write_reg(APIC_TMICT, 0);
+
+               freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0);
+               /* Check if measured frequency is within 5% of configured frequency. */
+               __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100,
+                              "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu",
+                              freq, apic_hz * 95 / 100, apic_hz * 105 / 100,
+                              apic_hz, tdcrs[i].divide_count, tsc_hz);
+       }
+
+       GUEST_DONE();
+}
+
+static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
+{
+       bool done = false;
+       struct ucall uc;
+
+       while (!done) {
+               vcpu_run(vcpu);
+
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_DONE:
+                       done = true;
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+                       break;
+               }
+       }
+}
+
+static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
+                                   bool x2apic)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       int ret;
+
+       is_x2apic = x2apic;
+
+       vm = vm_create(1);
+
+       sync_global_to_guest(vm, is_x2apic);
+
+       vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+                     NSEC_PER_SEC / apic_hz);
+
+       vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
+       vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
+
+       ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+                             NSEC_PER_SEC / apic_hz);
+       TEST_ASSERT(ret < 0 && errno == EINVAL,
+                   "Setting of APIC bus frequency after vCPU is created should fail.");
+
+       if (!is_x2apic)
+               virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+       test_apic_bus_clock(vcpu);
+       kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name);
+       puts("");
+       printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n");
+       printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n");
+       puts("");
+}
+
+int main(int argc, char *argv[])
+{
+       /*
+        * Arbitrarilty default to 25MHz for the APIC bus frequency, which is
+        * different enough from the default 1GHz to be interesting.
+        */
+       uint64_t apic_hz = 25 * 1000 * 1000;
+       uint64_t delay_ms = 100;
+       int opt;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
+
+       while ((opt = getopt(argc, argv, "d:f:h")) != -1) {
+               switch (opt) {
+               case 'f':
+                       apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000;
+                       break;
+               case 'd':
+                       delay_ms = atoi_positive("Delay in milliseconds", optarg);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       exit(KSFT_SKIP);
+               }
+       }
+
+       run_apic_bus_clock_test(apic_hz, delay_ms, false);
+       run_apic_bus_clock_test(apic_hz, delay_ms, true);
+}
diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c
new file mode 100644 (file)
index 0000000..7b3fda6
--- /dev/null
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat Inc.
+ *
+ * Generic tests for KVM CPUID set/get ioctls
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct cpuid_mask {
+       union {
+               struct {
+                       u32 eax;
+                       u32 ebx;
+                       u32 ecx;
+                       u32 edx;
+               };
+               u32 regs[4];
+       };
+};
+
+static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
+{
+       int i;
+       u32 eax, ebx, ecx, edx;
+
+       for (i = 0; i < guest_cpuid->nent; i++) {
+               __cpuid(guest_cpuid->entries[i].function,
+                       guest_cpuid->entries[i].index,
+                       &eax, &ebx, &ecx, &edx);
+
+               GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+               GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+               GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+               GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
+       }
+
+}
+
+static void guest_main(struct kvm_cpuid2 *guest_cpuid)
+{
+       GUEST_SYNC(1);
+
+       test_guest_cpuids(guest_cpuid);
+
+       GUEST_SYNC(2);
+
+       GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
+
+       GUEST_DONE();
+}
+
+static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry)
+{
+       struct cpuid_mask mask;
+
+       memset(&mask, 0xff, sizeof(mask));
+
+       switch (entry->function) {
+       case 0x1:
+               mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit);
+               break;
+       case 0x7:
+               mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit);
+               break;
+       case 0xd:
+               /*
+                * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current
+                * XCR0 and IA32_XSS MSR values.
+                */
+               if (entry->index < 2)
+                       mask.ebx = 0;
+               break;
+       }
+       return mask;
+}
+
+static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
+                          const struct kvm_cpuid2 *cpuid2)
+{
+       const struct kvm_cpuid_entry2 *e1, *e2;
+       int i;
+
+       TEST_ASSERT(cpuid1->nent == cpuid2->nent,
+                   "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
+
+       for (i = 0; i < cpuid1->nent; i++) {
+               struct cpuid_mask mask;
+
+               e1 = &cpuid1->entries[i];
+               e2 = &cpuid2->entries[i];
+
+               TEST_ASSERT(e1->function == e2->function &&
+                           e1->index == e2->index && e1->flags == e2->flags,
+                           "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
+                           i, e1->function, e1->index, e1->flags,
+                           e2->function, e2->index, e2->flags);
+
+               /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */
+               mask = get_const_cpuid_mask(e1);
+
+               TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) &&
+                           (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) &&
+                           (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) &&
+                           (e1->edx & mask.edx) == (e2->edx & mask.edx),
+                           "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+                           e1->function, e1->index,
+                           e1->eax & mask.eax, e1->ebx & mask.ebx,
+                           e1->ecx & mask.ecx, e1->edx & mask.edx,
+                           e2->eax & mask.eax, e2->ebx & mask.ebx,
+                           e2->ecx & mask.ecx, e2->edx & mask.edx);
+       }
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage + 1,
+                           "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage + 1, (ulong)uc.args[1]);
+               return;
+       case UCALL_DONE:
+               return;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_ASSERT(false, "Unexpected exit: %s",
+                           exit_reason_str(vcpu->run->exit_reason));
+       }
+}
+
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+{
+       int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
+       vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
+       struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
+
+       memcpy(guest_cpuids, cpuid, size);
+
+       *p_gva = gva;
+       return guest_cpuids;
+}
+
+static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *ent;
+       int rc;
+       u32 eax, ebx, x;
+
+       /* Setting unmodified CPUID is allowed */
+       rc = __vcpu_set_cpuid(vcpu);
+       TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+       /* Changing CPU features is forbidden */
+       ent = vcpu_get_cpuid_entry(vcpu, 0x7);
+       ebx = ent->ebx;
+       ent->ebx--;
+       rc = __vcpu_set_cpuid(vcpu);
+       TEST_ASSERT(rc, "Changing CPU features should fail");
+       ent->ebx = ebx;
+
+       /* Changing MAXPHYADDR is forbidden */
+       ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
+       eax = ent->eax;
+       x = eax & 0xff;
+       ent->eax = (eax & ~0xffu) | (x - 1);
+       rc = __vcpu_set_cpuid(vcpu);
+       TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+       ent->eax = eax;
+}
+
+static void test_get_cpuid2(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
+       int i, r;
+
+       vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+       TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
+                   "KVM didn't update nent on success, wanted %u, got %u",
+                   vcpu->cpuid->nent, cpuid->nent);
+
+       for (i = 0; i < vcpu->cpuid->nent; i++) {
+               cpuid->nent = i;
+               r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+               TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
+               TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
+       }
+       free(cpuid);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t cpuid_gva;
+       struct kvm_vm *vm;
+       int stage;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
+
+       vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
+
+       vcpu_args_set(vcpu, 1, cpuid_gva);
+
+       for (stage = 0; stage < 3; stage++)
+               run_vcpu(vcpu, stage);
+
+       set_cpuid_after_run(vcpu);
+
+       test_get_cpuid2(vcpu);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c
new file mode 100644 (file)
index 0000000..28cc664
--- /dev/null
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ *   Wei Huang <wei@redhat.com>
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MAGIC_HYPERCALL_PORT   0x80
+
+static void guest_code(void)
+{
+       u32 regs[4] = {
+               [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function,
+               [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index,
+       };
+
+       /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */
+       GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE);
+
+       /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+       /*
+        * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output,
+        * and then restore CR4.  Do this all in  assembly to ensure no AVX
+        * instructions are executed while OSXSAVE=0.
+        */
+       asm volatile (
+               "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t"
+               "cpuid\n\t"
+               "mov %%rdi, %%cr4\n\t"
+               : "+a" (regs[KVM_CPUID_EAX]),
+                 "=b" (regs[KVM_CPUID_EBX]),
+                 "+c" (regs[KVM_CPUID_ECX]),
+                 "=d" (regs[KVM_CPUID_EDX])
+               : "D" (get_cr4())
+       );
+
+       /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */
+       GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit)));
+
+       /* Verify restoring CR4 also restored OSXSAVE in CPUID. */
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_sregs sregs;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       while (1) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT &&
+                   vcpu->run->io.direction == KVM_EXIT_IO_OUT) {
+                       /* emulate hypervisor clearing CR4.OSXSAVE */
+                       vcpu_sregs_get(vcpu, &sregs);
+                       sregs.cr4 &= ~X86_CR4_OSXSAVE;
+                       vcpu_sregs_set(vcpu, &sregs);
+                       continue;
+               }
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c
new file mode 100644 (file)
index 0000000..2d814c1
--- /dev/null
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM guest debug register tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+#define DR6_BD         (1 << 13)
+#define DR7_GD         (1 << 13)
+
+#define IRQ_VECTOR 0xAA
+
+/* For testing data access debug BP */
+uint32_t guest_value;
+
+extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
+
+static void guest_code(void)
+{
+       /* Create a pending interrupt on current vCPU */
+       x2apic_enable();
+       x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+                        APIC_DM_FIXED | IRQ_VECTOR);
+
+       /*
+        * Software BP tests.
+        *
+        * NOTE: sw_bp need to be before the cmd here, because int3 is an
+        * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we
+        * capture it using the vcpu exception bitmap).
+        */
+       asm volatile("sw_bp: int3");
+
+       /* Hardware instruction BP test */
+       asm volatile("hw_bp: nop");
+
+       /* Hardware data BP test */
+       asm volatile("mov $1234,%%rax;\n\t"
+                    "mov %%rax,%0;\n\t write_data:"
+                    : "=m" (guest_value) : : "rax");
+
+       /*
+        * Single step test, covers 2 basic instructions and 2 emulated
+        *
+        * Enable interrupts during the single stepping to see that pending
+        * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ.
+        *
+        * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler
+        * exits to userspace due to single-step being enabled.
+        */
+       asm volatile("ss_start: "
+                    "sti\n\t"
+                    "xor %%eax,%%eax\n\t"
+                    "cpuid\n\t"
+                    "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t"
+                    "wrmsr\n\t"
+                    "cli\n\t"
+                    : : : "eax", "ebx", "ecx", "edx");
+
+       /* DR6.BD test */
+       asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
+       GUEST_DONE();
+}
+
+#define  CAST_TO_RIP(v)  ((unsigned long long)&(v))
+
+static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
+{
+       struct kvm_regs regs;
+
+       vcpu_regs_get(vcpu, &regs);
+       regs.rip += insn_len;
+       vcpu_regs_set(vcpu, &regs);
+}
+
+int main(void)
+{
+       struct kvm_guest_debug debug;
+       unsigned long long target_dr6, target_rip;
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       uint64_t cmd;
+       int i;
+       /* Instruction lengths starting at ss_start */
+       int ss_size[6] = {
+               1,              /* sti*/
+               2,              /* xor */
+               2,              /* cpuid */
+               5,              /* mov */
+               2,              /* rdmsr */
+               1,              /* cli */
+       };
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       /* Test software BPs - int3 */
+       memset(&debug, 0, sizeof(debug));
+       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+       vcpu_guest_debug_set(vcpu, &debug);
+       vcpu_run(vcpu);
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                   run->debug.arch.exception == BP_VECTOR &&
+                   run->debug.arch.pc == CAST_TO_RIP(sw_bp),
+                   "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
+                   run->exit_reason, run->debug.arch.exception,
+                   run->debug.arch.pc, CAST_TO_RIP(sw_bp));
+       vcpu_skip_insn(vcpu, 1);
+
+       /* Test instruction HW BP over DR[0-3] */
+       for (i = 0; i < 4; i++) {
+               memset(&debug, 0, sizeof(debug));
+               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+               debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
+               debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
+               vcpu_guest_debug_set(vcpu, &debug);
+               vcpu_run(vcpu);
+               target_dr6 = 0xffff0ff0 | (1UL << i);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                           run->debug.arch.exception == DB_VECTOR &&
+                           run->debug.arch.pc == CAST_TO_RIP(hw_bp) &&
+                           run->debug.arch.dr6 == target_dr6,
+                           "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+                           i, run->exit_reason, run->debug.arch.exception,
+                           run->debug.arch.pc, CAST_TO_RIP(hw_bp),
+                           run->debug.arch.dr6, target_dr6);
+       }
+       /* Skip "nop" */
+       vcpu_skip_insn(vcpu, 1);
+
+       /* Test data access HW BP over DR[0-3] */
+       for (i = 0; i < 4; i++) {
+               memset(&debug, 0, sizeof(debug));
+               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+               debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
+               debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
+                   (0x000d0000UL << (4*i));
+               vcpu_guest_debug_set(vcpu, &debug);
+               vcpu_run(vcpu);
+               target_dr6 = 0xffff0ff0 | (1UL << i);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                           run->debug.arch.exception == DB_VECTOR &&
+                           run->debug.arch.pc == CAST_TO_RIP(write_data) &&
+                           run->debug.arch.dr6 == target_dr6,
+                           "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+                           i, run->exit_reason, run->debug.arch.exception,
+                           run->debug.arch.pc, CAST_TO_RIP(write_data),
+                           run->debug.arch.dr6, target_dr6);
+               /* Rollback the 4-bytes "mov" */
+               vcpu_skip_insn(vcpu, -7);
+       }
+       /* Skip the 4-bytes "mov" */
+       vcpu_skip_insn(vcpu, 7);
+
+       /* Test single step */
+       target_rip = CAST_TO_RIP(ss_start);
+       target_dr6 = 0xffff4ff0ULL;
+       for (i = 0; i < ARRAY_SIZE(ss_size); i++) {
+               target_rip += ss_size[i];
+               memset(&debug, 0, sizeof(debug));
+               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+                               KVM_GUESTDBG_BLOCKIRQ;
+               debug.arch.debugreg[7] = 0x00000400;
+               vcpu_guest_debug_set(vcpu, &debug);
+               vcpu_run(vcpu);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                           run->debug.arch.exception == DB_VECTOR &&
+                           run->debug.arch.pc == target_rip &&
+                           run->debug.arch.dr6 == target_dr6,
+                           "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
+                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+                           i, run->exit_reason, run->debug.arch.exception,
+                           run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+                           target_dr6);
+       }
+
+       /* Finally test global disable */
+       memset(&debug, 0, sizeof(debug));
+       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+       debug.arch.debugreg[7] = 0x400 | DR7_GD;
+       vcpu_guest_debug_set(vcpu, &debug);
+       vcpu_run(vcpu);
+       target_dr6 = 0xffff0ff0 | DR6_BD;
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+                   run->debug.arch.exception == DB_VECTOR &&
+                   run->debug.arch.pc == CAST_TO_RIP(bd_start) &&
+                   run->debug.arch.dr6 == target_dr6,
+                           "DR7.GD: exit %d exception %d rip 0x%llx "
+                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+                           run->exit_reason, run->debug.arch.exception,
+                           run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+                           target_dr6);
+
+       /* Disable all debug controls, run to the end */
+       memset(&debug, 0, sizeof(debug));
+       vcpu_guest_debug_set(vcpu, &debug);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       cmd = get_ucall(vcpu, &uc);
+       TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
new file mode 100644 (file)
index 0000000..2929c06
--- /dev/null
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty logging page splitting test
+ *
+ * Based on dirty_log_perf.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2023, Google, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+#include "ucall_common.h"
+
+#define VCPUS          2
+#define SLOTS          2
+#define ITERATIONS     2
+
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
+
+static u64 dirty_log_manual_caps;
+static bool host_quit;
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+struct kvm_page_stats {
+       uint64_t pages_4k;
+       uint64_t pages_2m;
+       uint64_t pages_1g;
+       uint64_t hugepages;
+};
+
+static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
+{
+       stats->pages_4k = vm_get_stat(vm, "pages_4k");
+       stats->pages_2m = vm_get_stat(vm, "pages_2m");
+       stats->pages_1g = vm_get_stat(vm, "pages_1g");
+       stats->hugepages = stats->pages_2m + stats->pages_1g;
+
+       pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
+                stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
+                stats->hugepages);
+}
+
+static void run_vcpu_iteration(struct kvm_vm *vm)
+{
+       int i;
+
+       iteration++;
+       for (i = 0; i < VCPUS; i++) {
+               while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
+                      iteration)
+                       ;
+       }
+}
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+       struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+       int vcpu_idx = vcpu_args->vcpu_idx;
+
+       while (!READ_ONCE(host_quit)) {
+               int current_iteration = READ_ONCE(iteration);
+
+               vcpu_run(vcpu);
+
+               TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+
+               vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+
+               /* Wait for the start of the next iteration to be signaled. */
+               while (current_iteration == READ_ONCE(iteration) &&
+                      READ_ONCE(iteration) >= 0 &&
+                      !READ_ONCE(host_quit))
+                       ;
+       }
+}
+
+static void run_test(enum vm_guest_mode mode, void *unused)
+{
+       struct kvm_vm *vm;
+       unsigned long **bitmaps;
+       uint64_t guest_num_pages;
+       uint64_t host_num_pages;
+       uint64_t pages_per_slot;
+       int i;
+       struct kvm_page_stats stats_populated;
+       struct kvm_page_stats stats_dirty_logging_enabled;
+       struct kvm_page_stats stats_dirty_pass[ITERATIONS];
+       struct kvm_page_stats stats_clear_pass[ITERATIONS];
+       struct kvm_page_stats stats_dirty_logging_disabled;
+       struct kvm_page_stats stats_repopulated;
+
+       vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
+                                SLOTS, backing_src, false);
+
+       guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
+       guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+       host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+       pages_per_slot = host_num_pages / SLOTS;
+       TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
+       TEST_ASSERT(!(host_num_pages % 512),
+                   "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
+
+       bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
+
+       if (dirty_log_manual_caps)
+               vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+                             dirty_log_manual_caps);
+
+       /* Start the iterations */
+       iteration = -1;
+       host_quit = false;
+
+       for (i = 0; i < VCPUS; i++)
+               vcpu_last_completed_iteration[i] = -1;
+
+       memstress_start_vcpu_threads(VCPUS, vcpu_worker);
+
+       run_vcpu_iteration(vm);
+       get_page_stats(vm, &stats_populated, "populating memory");
+
+       /* Enable dirty logging */
+       memstress_enable_dirty_logging(vm, SLOTS);
+
+       get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
+
+       while (iteration < ITERATIONS) {
+               run_vcpu_iteration(vm);
+               get_page_stats(vm, &stats_dirty_pass[iteration - 1],
+                              "dirtying memory");
+
+               memstress_get_dirty_log(vm, bitmaps, SLOTS);
+
+               if (dirty_log_manual_caps) {
+                       memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
+
+                       get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
+               }
+       }
+
+       /* Disable dirty logging */
+       memstress_disable_dirty_logging(vm, SLOTS);
+
+       get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
+
+       /* Run vCPUs again to fault pages back in. */
+       run_vcpu_iteration(vm);
+       get_page_stats(vm, &stats_repopulated, "repopulating memory");
+
+       /*
+        * Tell the vCPU threads to quit.  No need to manually check that vCPUs
+        * have stopped running after disabling dirty logging, the join will
+        * wait for them to exit.
+        */
+       host_quit = true;
+       memstress_join_vcpu_threads(VCPUS);
+
+       memstress_free_bitmaps(bitmaps, SLOTS);
+       memstress_destroy_vm(vm);
+
+       TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
+                       stats_populated.pages_1g * 512 * 512), host_num_pages);
+
+       /*
+        * Check that all huge pages were split. Since large pages can only
+        * exist in the data slot, and the vCPUs should have dirtied all pages
+        * in the data slot, there should be no huge pages left after splitting.
+        * Splitting happens at dirty log enable time without
+        * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
+        * with that capability.
+        */
+       if (dirty_log_manual_caps) {
+               TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+               TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
+                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
+                           host_num_pages, stats_clear_pass[0].pages_4k);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+       } else {
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+               TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
+                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
+                           host_num_pages, stats_dirty_logging_enabled.pages_4k);
+       }
+
+       /*
+        * Once dirty logging is disabled and the vCPUs have touched all their
+        * memory again, the hugepage counts should be the same as they were
+        * right after initial population of memory.
+        */
+       TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+       TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
+              name);
+       puts("");
+       printf(" -b: specify the size of the memory region which should be\n"
+              "     dirtied by each vCPU. e.g. 10M or 3G.\n"
+              "     (default: 1G)\n");
+       backing_src_help("-s");
+       puts("");
+}
+
+int main(int argc, char *argv[])
+{
+       int opt;
+
+       TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
+       TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
+
+       while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
+               switch (opt) {
+               case 'b':
+                       guest_percpu_mem_size = parse_size(optarg);
+                       break;
+               case 'h':
+                       help(argv[0]);
+                       exit(0);
+               case 's':
+                       backing_src = parse_backing_src_type(optarg);
+                       break;
+               default:
+                       help(argv[0]);
+                       exit(1);
+               }
+       }
+
+       if (!is_backing_src_hugetlb(backing_src)) {
+               pr_info("This test will only work reliably with HugeTLB memory. "
+                       "It can work with THP, but that is best effort.\n");
+       }
+
+       guest_modes_append_default();
+
+       dirty_log_manual_caps = 0;
+       for_each_guest_mode(run_test, NULL);
+
+       dirty_log_manual_caps =
+               kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+
+       if (dirty_log_manual_caps) {
+               dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+                                         KVM_DIRTY_LOG_INITIALLY_SET);
+               for_each_guest_mode(run_test, NULL);
+       } else {
+               pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c
new file mode 100644 (file)
index 0000000..8105547
--- /dev/null
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ *
+ * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
+ */
+#include "flds_emulation.h"
+#include "test_util.h"
+#include "ucall_common.h"
+
+#define MMIO_GPA       0x700000000
+#define MMIO_GVA       MMIO_GPA
+
+static void guest_code(void)
+{
+       /* Execute flds with an MMIO address to force KVM to emulate it. */
+       flds(MMIO_GVA);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+       virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
+
+       vcpu_run(vcpu);
+       handle_flds_emulation_failure_exit(vcpu);
+       vcpu_run(vcpu);
+       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
new file mode 100644 (file)
index 0000000..a72f13a
--- /dev/null
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+static bool is_kvm_controlled_msr(uint32_t msr)
+{
+       return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
+}
+
+/*
+ * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
+ * MSR, and doesn't allow setting the hidden version.
+ */
+static bool is_hidden_vmx_msr(uint32_t msr)
+{
+       switch (msr) {
+       case MSR_IA32_VMX_PINBASED_CTLS:
+       case MSR_IA32_VMX_PROCBASED_CTLS:
+       case MSR_IA32_VMX_EXIT_CTLS:
+       case MSR_IA32_VMX_ENTRY_CTLS:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool is_quirked_msr(uint32_t msr)
+{
+       return msr != MSR_AMD64_DE_CFG;
+}
+
+static void test_feature_msr(uint32_t msr)
+{
+       const uint64_t supported_mask = kvm_get_feature_msr(msr);
+       uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /*
+        * Don't bother testing KVM-controlled MSRs beyond verifying that the
+        * MSR can be read from userspace.  Any value is effectively legal, as
+        * KVM is bound by x86 architecture, not by ABI.
+        */
+       if (is_kvm_controlled_msr(msr))
+               return;
+
+       /*
+        * More goofy behavior.  KVM reports the host CPU's actual revision ID,
+        * but initializes the vCPU's revision ID to an arbitrary value.
+        */
+       if (msr == MSR_IA32_UCODE_REV)
+               reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065;
+
+       /*
+        * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the
+        * full set of features supported by KVM.  For non-quirked MSRs, and
+        * when the quirk is disabled, KVM must zero-initialize the MSR and let
+        * userspace do the configuration.
+        */
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+       TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value,
+                   "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx",
+                   reset_value, is_quirked_msr(msr) ? "" : "non-", msr,
+                   vcpu_get_msr(vcpu, msr));
+       if (!is_hidden_vmx_msr(msr))
+               vcpu_set_msr(vcpu, msr, supported_mask);
+       kvm_vm_free(vm);
+
+       if (is_hidden_vmx_msr(msr))
+               return;
+
+       if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) ||
+           !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
+               return;
+
+       vm = vm_create(1);
+       vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS);
+
+       vcpu = vm_vcpu_add(vm, 0, NULL);
+       TEST_ASSERT(!vcpu_get_msr(vcpu, msr),
+                   "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx",
+                   msr, vcpu_get_msr(vcpu, msr));
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       const struct kvm_msr_list *feature_list;
+       int i;
+
+       /*
+        * Skip the entire test if MSR_FEATURES isn't supported, other tests
+        * will cover the "regular" list of MSRs, the coverage here is purely
+        * opportunistic and not interesting on its own.
+        */
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
+
+       (void)kvm_get_msr_index_list();
+
+       feature_list = kvm_get_feature_msr_index_list();
+       for (i = 0; i < feature_list->nmsrs; i++)
+               test_feature_msr(feature_list->indices[i]);
+}
diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
new file mode 100644 (file)
index 0000000..762628f
--- /dev/null
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+#include <stdint.h>
+
+#include "kvm_test_harness.h"
+#include "apic.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* VMCALL and VMMCALL are both 3-byte opcodes. */
+#define HYPERCALL_INSN_SIZE    3
+
+static bool quirk_disabled;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       regs->rax = -EFAULT;
+       regs->rip += HYPERCALL_INSN_SIZE;
+}
+
+static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE]  = { 0x0f, 0x01, 0xc1 };
+static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
+
+extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
+static uint64_t do_sched_yield(uint8_t apic_id)
+{
+       uint64_t ret;
+
+       asm volatile("hypercall_insn:\n\t"
+                    ".byte 0xcc,0xcc,0xcc\n\t"
+                    : "=a"(ret)
+                    : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+                    : "memory");
+
+       return ret;
+}
+
+static void guest_main(void)
+{
+       const uint8_t *native_hypercall_insn;
+       const uint8_t *other_hypercall_insn;
+       uint64_t ret;
+
+       if (host_cpu_is_intel) {
+               native_hypercall_insn = vmx_vmcall;
+               other_hypercall_insn  = svm_vmmcall;
+       } else if (host_cpu_is_amd) {
+               native_hypercall_insn = svm_vmmcall;
+               other_hypercall_insn  = vmx_vmcall;
+       } else {
+               GUEST_ASSERT(0);
+               /* unreachable */
+               return;
+       }
+
+       memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
+
+       ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
+
+       /*
+        * If the quirk is disabled, verify that guest_ud_handler() "returned"
+        * -EFAULT and that KVM did NOT patch the hypercall.  If the quirk is
+        * enabled, verify that the hypercall succeeded and that KVM patched in
+        * the "right" hypercall.
+        */
+       if (quirk_disabled) {
+               GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+               GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
+                            HYPERCALL_INSN_SIZE));
+       } else {
+               GUEST_ASSERT(!ret);
+               GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
+                            HYPERCALL_INSN_SIZE));
+       }
+
+       GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
+               break;
+       case UCALL_DONE:
+               return;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
+                         uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
+       }
+}
+
+static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
+{
+       struct kvm_vm *vm = vcpu->vm;
+
+       vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
+
+       if (disable_quirk)
+               vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+                             KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+       quirk_disabled = disable_quirk;
+       sync_global_to_guest(vm, quirk_disabled);
+
+       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+       enter_guest(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
+{
+       test_fix_hypercall(vcpu, false);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
+{
+       test_fix_hypercall(vcpu, true);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+       return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h
new file mode 100644 (file)
index 0000000..37b1a9f
--- /dev/null
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_FLDS_EMULATION_H
+#define SELFTEST_KVM_FLDS_EMULATION_H
+
+#include "kvm_util.h"
+
+#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
+
+/*
+ * flds is an instruction that the KVM instruction emulator is known not to
+ * support. This can be used in guest code along with a mechanism to force
+ * KVM to emulate the instruction (e.g. by providing an MMIO address) to
+ * exercise emulation failures.
+ */
+static inline void flds(uint64_t address)
+{
+       __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
+}
+
+static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+       uint8_t *insn_bytes;
+       uint64_t flags;
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+
+       TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+                   "Unexpected suberror: %u",
+                   run->emulation_failure.suberror);
+
+       flags = run->emulation_failure.flags;
+       TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
+                   flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
+                   "run->emulation_failure is missing instruction bytes");
+
+       TEST_ASSERT(run->emulation_failure.insn_size >= 2,
+                   "Expected a 2-byte opcode for 'flds', got %d bytes",
+                   run->emulation_failure.insn_size);
+
+       insn_bytes = run->emulation_failure.insn_bytes;
+       TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
+                   "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
+                   insn_bytes[0], insn_bytes[1]);
+
+       vcpu_regs_get(vcpu, &regs);
+       regs.rip += 2;
+       vcpu_regs_set(vcpu, &regs);
+}
+
+#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
new file mode 100644 (file)
index 0000000..10b1b0b
--- /dev/null
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
+{
+       const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+       const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
+       const uint64_t legal = ignored | valid;
+       uint64_t val = BIT_ULL(bit);
+       uint64_t actual;
+       int r;
+
+       r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
+       TEST_ASSERT(val & ~legal ? !r : r == 1,
+                   "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
+                   val, val & ~legal ? "fail" : "succeed");
+
+       actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
+       TEST_ASSERT(actual == (val & valid),
+                   "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
+                   bit, actual, (val & valid));
+
+       vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       unsigned int bit;
+
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       for (bit = 0; bit < BITS_PER_LONG; bit++)
+               test_hwcr_bit(vcpu, bit);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c
new file mode 100644 (file)
index 0000000..e058bc6
--- /dev/null
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+struct ms_hyperv_tsc_page {
+       volatile u32 tsc_sequence;
+       u32 reserved1;
+       volatile u64 tsc_scale;
+       volatile s64 tsc_offset;
+} __packed;
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+       union {
+               u64 ll;
+               struct {
+                       u32 low, high;
+               } l;
+       } rm, rn, rh, a0, b0;
+       u64 c;
+
+       a0.ll = a;
+       b0.ll = b;
+
+       rm.ll = (u64)a0.l.low * b0.l.high;
+       rn.ll = (u64)a0.l.high * b0.l.low;
+       rh.ll = (u64)a0.l.high * b0.l.high;
+
+       rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+       rh.l.high = (c >> 32) + rh.l.high;
+
+       return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+       int i;
+
+       for (i = 0; i < 100000000; i++)
+               asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+       u64 tsc_freq, r1, r2, t1, t2;
+       s64 delta_ns;
+
+       tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+       GUEST_ASSERT(tsc_freq > 0);
+
+       /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
+       r1 = rdtsc();
+       t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+       r1 = (r1 + rdtsc()) / 2;
+       nop_loop();
+       r2 = rdtsc();
+       t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+       r2 = (r2 + rdtsc()) / 2;
+
+       GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+       if (delta_ns < 0)
+               delta_ns = -delta_ns;
+
+       /* 1% tolerance */
+       GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
+{
+       return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+       u64 r1, r2, t1, t2;
+
+       /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+       t1 = get_tscpage_ts(tsc_page);
+       r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+       /* 10 ms tolerance */
+       GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+       nop_loop();
+
+       t2 = get_tscpage_ts(tsc_page);
+       r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+       GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+       u64 tsc_scale, tsc_offset;
+
+       /* Set Guest OS id to enable Hyper-V emulation */
+       GUEST_SYNC(1);
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       GUEST_SYNC(2);
+
+       check_tsc_msr_rdtsc();
+
+       GUEST_SYNC(3);
+
+       /* Set up TSC page is disabled state, check that it's clean */
+       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+       GUEST_ASSERT(tsc_page->tsc_scale == 0);
+       GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+       GUEST_SYNC(4);
+
+       /* Set up TSC page is enabled state */
+       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+       GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+       GUEST_SYNC(5);
+
+       check_tsc_msr_tsc_page(tsc_page);
+
+       GUEST_SYNC(6);
+
+       tsc_offset = tsc_page->tsc_offset;
+       /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+
+       GUEST_SYNC(7);
+       /* Sanity check TSC page timestamp, it should be close to 0 */
+       GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
+
+       GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+       nop_loop();
+
+       /*
+        * Enable Re-enlightenment and check that TSC page stays constant across
+        * KVM_SET_CLOCK.
+        */
+       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+       tsc_offset = tsc_page->tsc_offset;
+       tsc_scale = tsc_page->tsc_scale;
+       GUEST_SYNC(8);
+       GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+       GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+       GUEST_SYNC(9);
+
+       check_tsc_msr_tsc_page(tsc_page);
+
+       /*
+        * Disable re-enlightenment and TSC page, check that KVM doesn't update
+        * it anymore.
+        */
+       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+       wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+       memset(tsc_page, 0, sizeof(*tsc_page));
+
+       GUEST_SYNC(10);
+       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+       GUEST_ASSERT(tsc_page->tsc_offset == 0);
+       GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+       GUEST_DONE();
+}
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
+{
+       u64 tsc_freq, r1, r2, t1, t2;
+       s64 delta_ns;
+
+       tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
+       TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+       /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
+       r1 = rdtsc();
+       t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+       r1 = (r1 + rdtsc()) / 2;
+       nop_loop();
+       r2 = rdtsc();
+       t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+       r2 = (r2 + rdtsc()) / 2;
+
+       TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+       if (delta_ns < 0)
+               delta_ns = -delta_ns;
+
+       /* 1% tolerance */
+       TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+                   "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+                   (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       vm_vaddr_t tsc_page_gva;
+       int stage;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
+       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       vcpu_set_hv_cpuid(vcpu);
+
+       tsc_page_gva = vm_vaddr_alloc_page(vm);
+       memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
+       TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+               "TSC page has to be page aligned");
+       vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+       host_check_tsc_msr_rdtsc(vcpu);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       /* Keep in sync with guest_main() */
+                       TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
+                                   stage);
+                       goto out;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage,
+                           "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+
+               /* Reset kvmclock triggering TSC page update */
+               if (stage == 7 || stage == 8 || stage == 10) {
+                       struct kvm_clock_data clock = {0};
+
+                       vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+               }
+       }
+
+out:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
new file mode 100644 (file)
index 0000000..4f5881d
--- /dev/null
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_HYPERV_CPUID
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+static void guest_code(void)
+{
+}
+
+static bool smt_possible(void)
+{
+       char buf[16];
+       FILE *f;
+       bool res = true;
+
+       f = fopen("/sys/devices/system/cpu/smt/control", "r");
+       if (f) {
+               if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
+                       if (!strncmp(buf, "forceoff", 8) ||
+                           !strncmp(buf, "notsupported", 12))
+                               res = false;
+               }
+               fclose(f);
+       }
+
+       return res;
+}
+
+static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
+                         bool evmcs_expected)
+{
+       int i;
+       int nent_expected = 10;
+       u32 test_val;
+
+       TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
+                   "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+                   " (returned %d)",
+                   nent_expected, hv_cpuid_entries->nent);
+
+       for (i = 0; i < hv_cpuid_entries->nent; i++) {
+               const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
+
+               TEST_ASSERT((entry->function >= 0x40000000) &&
+                           (entry->function <= 0x40000082),
+                           "function %x is our of supported range",
+                           entry->function);
+
+               TEST_ASSERT(entry->index == 0,
+                           ".index field should be zero");
+
+               TEST_ASSERT(entry->flags == 0,
+                           ".flags field should be zero");
+
+               TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
+                           !entry->padding[2], "padding should be zero");
+
+               switch (entry->function) {
+               case 0x40000000:
+                       test_val = 0x40000082;
+
+                       TEST_ASSERT(entry->eax == test_val,
+                                   "Wrong max leaf report in 0x40000000.EAX: %x"
+                                   " (evmcs=%d)",
+                                   entry->eax, evmcs_expected
+                               );
+                       break;
+               case 0x40000004:
+                       test_val = entry->eax & (1UL << 18);
+
+                       TEST_ASSERT(!!test_val == !smt_possible(),
+                                   "NoNonArchitecturalCoreSharing bit"
+                                   " doesn't reflect SMT setting");
+                       break;
+               case 0x4000000A:
+                       TEST_ASSERT(entry->eax & (1UL << 19),
+                                   "Enlightened MSR-Bitmap should always be supported"
+                                   " 0x40000000.EAX: %x", entry->eax);
+                       if (evmcs_expected)
+                               TEST_ASSERT((entry->eax & 0xffff) == 0x101,
+                                   "Supported Enlightened VMCS version range is supposed to be 1:1"
+                                   " 0x40000000.EAX: %x", entry->eax);
+
+                       break;
+               default:
+                       break;
+
+               }
+               /*
+                * If needed for debug:
+                * fprintf(stdout,
+                *      "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n",
+                *      entry->function, entry->eax, entry->ebx, entry->ecx,
+                *      entry->edx);
+                */
+       }
+}
+
+void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+       static struct kvm_cpuid2 cpuid = {.nent = 0};
+       int ret;
+
+       if (vcpu)
+               ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+       else
+               ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+
+       TEST_ASSERT(ret == -1 && errno == E2BIG,
+                   "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+                   " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       const struct kvm_cpuid2 *hv_cpuid_entries;
+       struct kvm_vcpu *vcpu;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       /* Test vCPU ioctl version */
+       test_hv_cpuid_e2big(vm, vcpu);
+
+       hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+       test_hv_cpuid(hv_cpuid_entries, false);
+       free((void *)hv_cpuid_entries);
+
+       if (!kvm_cpu_has(X86_FEATURE_VMX) ||
+           !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+               print_skip("Enlightened VMCS is unsupported");
+               goto do_sys;
+       }
+       vcpu_enable_evmcs(vcpu);
+       hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+       test_hv_cpuid(hv_cpuid_entries, true);
+       free((void *)hv_cpuid_entries);
+
+do_sys:
+       /* Test system ioctl version */
+       if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+               print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+               goto out;
+       }
+
+       test_hv_cpuid_e2big(vm, NULL);
+
+       hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+       test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
+
+out:
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
new file mode 100644 (file)
index 0000000..74cf196
--- /dev/null
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "hyperv.h"
+#include "vmx.h"
+
+static int ud_count;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       ud_count++;
+       regs->rip += 3; /* VMLAUNCH */
+}
+
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
+
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+       /* Currently, L1 doesn't preserve GPRs during vmexits. */
+       __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+                             "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                             "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+/* Exit to L1 from L2 with RDMSR instruction */
+void l2_guest_code(void)
+{
+       u64 unused;
+
+       GUEST_SYNC(7);
+
+       GUEST_SYNC(8);
+
+       /* Forced exit to L1 upon restore */
+       GUEST_SYNC(9);
+
+       vmcall();
+
+       /* MSR-Bitmap tests */
+       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+       rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+       vmcall();
+       rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+       /* L2 TLB flush tests */
+       hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+                        HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
+       rdmsr_from_l2(MSR_FS_BASE);
+       /*
+        * Note: hypercall status (RAX) is not preserved correctly by L1 after
+        * synthetic vmexit, use unchecked version.
+        */
+       __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+                          HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
+                          &unused);
+
+       /* Done, exit to L1 and never come back.  */
+       vmcall();
+}
+
+void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
+               vm_vaddr_t hv_hcall_page_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
+
+       x2apic_enable();
+
+       GUEST_SYNC(1);
+       GUEST_SYNC(2);
+
+       enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+       evmcs_enable();
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_SYNC(3);
+       GUEST_ASSERT(load_evmcs(hv_pages));
+       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+       GUEST_SYNC(4);
+       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_SYNC(5);
+       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+       current_evmcs->revision_id = -1u;
+       GUEST_ASSERT(vmlaunch());
+       current_evmcs->revision_id = EVMCS_VERSION;
+       GUEST_SYNC(6);
+
+       vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+               PIN_BASED_NMI_EXITING);
+
+       /* L2 TLB flush setup */
+       current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
+       current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+       current_evmcs->hv_vm_id = 1;
+       current_evmcs->hv_vp_id = 1;
+       current_vp_assist->nested_control.features.directhypercall = 1;
+       *(u32 *)(hv_pages->partition_assist) = 0;
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+       GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
+       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+       /*
+        * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+        * up-to-date (RIP points where it should and not at the beginning
+        * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+        */
+       GUEST_ASSERT(!vmresume());
+
+       GUEST_SYNC(10);
+
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       current_evmcs->guest_rip += 3; /* vmcall */
+
+       /* Intercept RDMSR 0xc0000100 */
+       vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+               CPU_BASED_USE_MSR_BITMAPS);
+       __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+       current_evmcs->guest_rip += 2; /* rdmsr */
+
+       /* Enable enlightened MSR bitmap */
+       current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+       current_evmcs->guest_rip += 2; /* rdmsr */
+
+       /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+       __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+       /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+       current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+       GUEST_ASSERT(!vmresume());
+       /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       current_evmcs->guest_rip += 3; /* vmcall */
+
+       /* Now tell KVM we've changed MSR-Bitmap */
+       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+       current_evmcs->guest_rip += 2; /* rdmsr */
+
+       /*
+        * L2 TLB flush test. First VMCALL should be handled directly by L0,
+        * no VMCALL exit expected.
+        */
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+       current_evmcs->guest_rip += 2; /* rdmsr */
+       /* Enable synthetic vmexit */
+       *(u32 *)(hv_pages->partition_assist) = 1;
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
+
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       GUEST_SYNC(11);
+
+       /* Try enlightened vmptrld with an incorrect GPA */
+       evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(ud_count == 1);
+       GUEST_DONE();
+}
+
+void inject_nmi(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_events events;
+
+       vcpu_events_get(vcpu, &events);
+
+       events.nmi.pending = 1;
+       events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
+
+       vcpu_events_set(vcpu, &events);
+}
+
+static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
+                                       struct kvm_vcpu *vcpu)
+{
+       struct kvm_regs regs1, regs2;
+       struct kvm_x86_state *state;
+
+       state = vcpu_save_state(vcpu);
+       memset(&regs1, 0, sizeof(regs1));
+       vcpu_regs_get(vcpu, &regs1);
+
+       kvm_vm_release(vm);
+
+       /* Restore state in a new VM.  */
+       vcpu = vm_recreate_with_one_vcpu(vm);
+       vcpu_set_hv_cpuid(vcpu);
+       vcpu_enable_evmcs(vcpu);
+       vcpu_load_state(vcpu, state);
+       kvm_x86_state_cleanup(state);
+
+       memset(&regs2, 0, sizeof(regs2));
+       vcpu_regs_get(vcpu, &regs2);
+       TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+                   "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+                   (ulong) regs2.rdi, (ulong) regs2.rsi);
+       return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
+       vm_vaddr_t hcall_page;
+
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int stage;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+       TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       hcall_page = vm_vaddr_alloc_pages(vm, 1);
+       memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
+
+       vcpu_set_hv_cpuid(vcpu);
+       vcpu_enable_evmcs(vcpu);
+
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+       vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+       vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+
+       pr_info("Running L1 which uses EVMCS to run L2\n");
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               /* UCALL_SYNC is handled here.  */
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+
+               vcpu = save_restore_vm(vm, vcpu);
+
+               /* Force immediate L2->L1 exit before resuming */
+               if (stage == 8) {
+                       pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+                       inject_nmi(vcpu);
+               }
+
+               /*
+                * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+                * restored VM (before the first KVM_RUN) to check that
+                * KVM_STATE_NESTED_EVMCS is not lost.
+                */
+               if (stage == 9) {
+                       pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+                       vcpu = save_restore_vm(vm, vcpu);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
new file mode 100644 (file)
index 0000000..949e08e
--- /dev/null
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
+ * exit to userspace and receive result in guest.
+ *
+ * Negative tests are present in hyperv_features.c
+ *
+ * Copyright 2022 Google LLC
+ * Author: Vipin Sharma <vipinsh@google.com>
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/* Any value is fine */
+#define EXT_CAPABILITIES 0xbull
+
+static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
+                      vm_vaddr_t out_pg_gva)
+{
+       uint64_t *output_gva;
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
+
+       output_gva = (uint64_t *)out_pg_gva;
+
+       hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
+
+       /* TLFS states output will be a uint64_t value */
+       GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
+
+       GUEST_DONE();
+}
+
+int main(void)
+{
+       vm_vaddr_t hcall_out_page;
+       vm_vaddr_t hcall_in_page;
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       uint64_t *outval;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+       /* Verify if extended hypercalls are supported */
+       if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
+                          HV_ENABLE_EXTENDED_HYPERCALLS)) {
+               print_skip("Extended calls not supported by the kernel");
+               exit(KSFT_SKIP);
+       }
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+       vcpu_set_hv_cpuid(vcpu);
+
+       /* Hypercall input */
+       hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
+       memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
+
+       /* Hypercall output */
+       hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
+       memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
+
+       vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
+                     addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
+                   "Unexpected exit reason: %u (%s)",
+                   run->exit_reason, exit_reason_str(run->exit_reason));
+
+       outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
+       *outval = EXT_CAPABILITIES;
+       run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                   "Unexpected exit reason: %u (%s)",
+                   run->exit_reason, exit_reason_str(run->exit_reason));
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+       }
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
new file mode 100644 (file)
index 0000000..068e9c6
--- /dev/null
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/*
+ * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
+ * but to activate the feature it is sufficient to set it to a non-zero
+ * value. Use BIT(0) for that.
+ */
+#define HV_PV_SPINLOCKS_TEST            \
+       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
+
+struct msr_data {
+       uint32_t idx;
+       bool fault_expected;
+       bool write;
+       u64 write_val;
+};
+
+struct hcall_data {
+       uint64_t control;
+       uint64_t expect;
+       bool ud_expected;
+};
+
+static bool is_write_only_msr(uint32_t msr)
+{
+       return msr == HV_X64_MSR_EOI;
+}
+
+static void guest_msr(struct msr_data *msr)
+{
+       uint8_t vector = 0;
+       uint64_t msr_val = 0;
+
+       GUEST_ASSERT(msr->idx);
+
+       if (msr->write)
+               vector = wrmsr_safe(msr->idx, msr->write_val);
+
+       if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
+               vector = rdmsr_safe(msr->idx, &msr_val);
+
+       if (msr->fault_expected)
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
+                              msr->write ? "WR" : "RD", msr->idx, vector);
+       else
+               __GUEST_ASSERT(!vector,
+                              "Expected success on %sMSR(0x%x), got vector '0x%x'",
+                              msr->write ? "WR" : "RD", msr->idx, vector);
+
+       if (vector || is_write_only_msr(msr->idx))
+               goto done;
+
+       if (msr->write)
+               __GUEST_ASSERT(!vector,
+                              "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
+                              msr->idx, msr->write_val, msr_val);
+
+       /* Invariant TSC bit appears when TSC invariant control MSR is written to */
+       if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
+               if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
+                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
+               else
+                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
+                                    !!(msr_val & HV_INVARIANT_TSC_EXPOSED));
+       }
+
+done:
+       GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+       u64 res, input, output;
+       uint8_t vector;
+
+       GUEST_ASSERT_NE(hcall->control, 0);
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+       if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+               input = pgs_gpa;
+               output = pgs_gpa + 4096;
+       } else {
+               input = output = 0;
+       }
+
+       vector = __hyperv_hypercall(hcall->control, input, output, &res);
+       if (hcall->ud_expected) {
+               __GUEST_ASSERT(vector == UD_VECTOR,
+                              "Expected #UD for control '%lu', got vector '0x%x'",
+                              hcall->control, vector);
+       } else {
+               __GUEST_ASSERT(!vector,
+                              "Expected no exception for control '%lu', got vector '0x%x'",
+                              hcall->control, vector);
+               GUEST_ASSERT_EQ(res, hcall->expect);
+       }
+
+       GUEST_DONE();
+}
+
+static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Enable all supported Hyper-V features, then clear the leafs holding
+        * the features that will be tested one by one.
+        */
+       vcpu_set_hv_cpuid(vcpu);
+
+       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+}
+
+static void guest_test_msrs_access(void)
+{
+       struct kvm_cpuid2 *prev_cpuid = NULL;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int stage = 0;
+       vm_vaddr_t msr_gva;
+       struct msr_data *msr;
+       bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
+
+       while (true) {
+               vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
+
+               msr_gva = vm_vaddr_alloc_page(vm);
+               memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+               msr = addr_gva2hva(vm, msr_gva);
+
+               vcpu_args_set(vcpu, 1, msr_gva);
+               vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+               if (!prev_cpuid) {
+                       vcpu_reset_hv_cpuid(vcpu);
+
+                       prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+               } else {
+                       vcpu_init_cpuid(vcpu, prev_cpuid);
+               }
+
+               /* TODO: Make this entire test easier to maintain. */
+               if (stage >= 21)
+                       vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
+
+               switch (stage) {
+               case 0:
+                       /*
+                        * Only available when Hyper-V identification is set
+                        */
+                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 1:
+                       msr->idx = HV_X64_MSR_HYPERCALL;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 2:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+                       /*
+                        * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+                        * HV_X64_MSR_HYPERCALL available.
+                        */
+                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
+                       msr->write = true;
+                       msr->write_val = HYPERV_LINUX_OS_ID;
+                       msr->fault_expected = false;
+                       break;
+               case 3:
+                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 4:
+                       msr->idx = HV_X64_MSR_HYPERCALL;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+
+               case 5:
+                       msr->idx = HV_X64_MSR_VP_RUNTIME;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 6:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
+                       msr->idx = HV_X64_MSR_VP_RUNTIME;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 7:
+                       /* Read only */
+                       msr->idx = HV_X64_MSR_VP_RUNTIME;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 8:
+                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 9:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
+                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 10:
+                       /* Read only */
+                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 11:
+                       msr->idx = HV_X64_MSR_VP_INDEX;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 12:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
+                       msr->idx = HV_X64_MSR_VP_INDEX;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 13:
+                       /* Read only */
+                       msr->idx = HV_X64_MSR_VP_INDEX;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 14:
+                       msr->idx = HV_X64_MSR_RESET;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 15:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
+                       msr->idx = HV_X64_MSR_RESET;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 16:
+                       msr->idx = HV_X64_MSR_RESET;
+                       msr->write = true;
+                       /*
+                        * TODO: the test only writes '0' to HV_X64_MSR_RESET
+                        * at the moment, writing some other value there will
+                        * trigger real vCPU reset and the code is not prepared
+                        * to handle it yet.
+                        */
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+
+               case 17:
+                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 18:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
+                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 19:
+                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
+                       msr->write = true;
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+
+               case 20:
+                       msr->idx = HV_X64_MSR_EOM;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 21:
+                       /*
+                        * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+                        * capability enabled and guest visible CPUID bit unset.
+                        */
+                       msr->idx = HV_X64_MSR_EOM;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 22:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
+                       msr->idx = HV_X64_MSR_EOM;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 23:
+                       msr->idx = HV_X64_MSR_EOM;
+                       msr->write = true;
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+
+               case 24:
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 25:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 26:
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = true;
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+               case 27:
+                       /* Direct mode test */
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = true;
+                       msr->write_val = 1 << 12;
+                       msr->fault_expected = true;
+                       break;
+               case 28:
+                       vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
+                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+                       msr->write = true;
+                       msr->write_val = 1 << 12;
+                       msr->fault_expected = false;
+                       break;
+
+               case 29:
+                       msr->idx = HV_X64_MSR_EOI;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 30:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
+                       msr->idx = HV_X64_MSR_EOI;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = false;
+                       break;
+
+               case 31:
+                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 32:
+                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
+                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 33:
+                       /* Read only */
+                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 34:
+                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 35:
+                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
+                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 36:
+                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = false;
+                       break;
+               case 37:
+                       /* Can only write '0' */
+                       msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = true;
+                       break;
+
+               case 38:
+                       msr->idx = HV_X64_MSR_CRASH_P0;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 39:
+                       vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
+                       msr->idx = HV_X64_MSR_CRASH_P0;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 40:
+                       msr->idx = HV_X64_MSR_CRASH_P0;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = false;
+                       break;
+
+               case 41:
+                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 42:
+                       vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 43:
+                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+                       msr->write = true;
+                       msr->write_val = 0;
+                       msr->fault_expected = false;
+                       break;
+
+               case 44:
+                       /* MSR is not available when CPUID feature bit is unset */
+                       if (!has_invtsc)
+                               goto next_stage;
+                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+                       msr->write = false;
+                       msr->fault_expected = true;
+                       break;
+               case 45:
+                       /* MSR is vailable when CPUID feature bit is set */
+                       if (!has_invtsc)
+                               goto next_stage;
+                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
+                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+                       msr->write = false;
+                       msr->fault_expected = false;
+                       break;
+               case 46:
+                       /* Writing bits other than 0 is forbidden */
+                       if (!has_invtsc)
+                               goto next_stage;
+                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+                       msr->write = true;
+                       msr->write_val = 0xdeadbeef;
+                       msr->fault_expected = true;
+                       break;
+               case 47:
+                       /* Setting bit 0 enables the feature */
+                       if (!has_invtsc)
+                               goto next_stage;
+                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+                       msr->write = true;
+                       msr->write_val = 1;
+                       msr->fault_expected = false;
+                       break;
+
+               default:
+                       kvm_vm_free(vm);
+                       return;
+               }
+
+               vcpu_set_cpuid(vcpu);
+
+               memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+               pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+                        msr->idx, msr->write ? "write" : "read");
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       return;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+                       return;
+               }
+
+next_stage:
+               stage++;
+               kvm_vm_free(vm);
+       }
+}
+
+static void guest_test_hcalls_access(void)
+{
+       struct kvm_cpuid2 *prev_cpuid = NULL;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int stage = 0;
+       vm_vaddr_t hcall_page, hcall_params;
+       struct hcall_data *hcall;
+
+       while (true) {
+               vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
+
+               /* Hypercall input/output */
+               hcall_page = vm_vaddr_alloc_pages(vm, 2);
+               memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+               hcall_params = vm_vaddr_alloc_page(vm);
+               memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+               hcall = addr_gva2hva(vm, hcall_params);
+
+               vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+               vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+               if (!prev_cpuid) {
+                       vcpu_reset_hv_cpuid(vcpu);
+
+                       prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+               } else {
+                       vcpu_init_cpuid(vcpu, prev_cpuid);
+               }
+
+               switch (stage) {
+               case 0:
+                       vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+                       hcall->control = 0xbeef;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+                       break;
+
+               case 1:
+                       hcall->control = HVCALL_POST_MESSAGE;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 2:
+                       vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
+                       hcall->control = HVCALL_POST_MESSAGE;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+
+               case 3:
+                       hcall->control = HVCALL_SIGNAL_EVENT;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 4:
+                       vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
+                       hcall->control = HVCALL_SIGNAL_EVENT;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+
+               case 5:
+                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+                       break;
+               case 6:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 7:
+                       vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
+                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
+                       hcall->expect = HV_STATUS_OPERATION_DENIED;
+                       break;
+
+               case 8:
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 9:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+               case 10:
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 11:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+
+               case 12:
+                       hcall->control = HVCALL_SEND_IPI;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 13:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
+                       hcall->control = HVCALL_SEND_IPI;
+                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               case 14:
+                       /* Nothing in 'sparse banks' -> success */
+                       hcall->control = HVCALL_SEND_IPI_EX;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+
+               case 15:
+                       hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 16:
+                       vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
+                       hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+               case 17:
+                       /* XMM fast hypercall */
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+                       hcall->ud_expected = true;
+                       break;
+               case 18:
+                       vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+                       hcall->ud_expected = false;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+               case 19:
+                       hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
+                       hcall->expect = HV_STATUS_ACCESS_DENIED;
+                       break;
+               case 20:
+                       vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
+                       hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
+                       hcall->expect = HV_STATUS_INVALID_PARAMETER;
+                       break;
+               case 21:
+                       kvm_vm_free(vm);
+                       return;
+               }
+
+               vcpu_set_cpuid(vcpu);
+
+               memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+               pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       return;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+                       return;
+               }
+
+               stage++;
+               kvm_vm_free(vm);
+       }
+}
+
+int main(void)
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID));
+
+       pr_info("Testing access to Hyper-V specific MSRs\n");
+       guest_test_msrs_access();
+
+       pr_info("Testing access to Hyper-V hypercalls\n");
+       guest_test_hcalls_access();
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
new file mode 100644 (file)
index 0000000..22c0c12
--- /dev/null
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define RECEIVER_VCPU_ID_1 2
+#define RECEIVER_VCPU_ID_2 65
+
+#define IPI_VECTOR      0xfe
+
+static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
+
+struct hv_vpset {
+       u64 format;
+       u64 valid_bank_mask;
+       u64 bank_contents[2];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+       HV_GENERIC_SET_SPARSE_4K,
+       HV_GENERIC_SET_ALL,
+};
+
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+       u32 vector;
+       u32 reserved;
+       u64 cpu_mask;
+};
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
+       u32 vector;
+       u32 reserved;
+       struct hv_vpset vp_set;
+};
+
+static inline void hv_init(vm_vaddr_t pgs_gpa)
+{
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+}
+
+static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+       u32 vcpu_id;
+
+       x2apic_enable();
+       hv_init(pgs_gpa);
+
+       vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+       /* Signal sender vCPU we're ready */
+       ipis_rcvd[vcpu_id] = (u64)-1;
+
+       for (;;)
+               asm volatile("sti; hlt; cli");
+}
+
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+       u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+       ipis_rcvd[vcpu_id]++;
+       wrmsr(HV_X64_MSR_EOI, 1);
+}
+
+static inline void nop_loop(void)
+{
+       int i;
+
+       for (i = 0; i < 100000000; i++)
+               asm volatile("nop");
+}
+
+static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+       struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
+       struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
+       int stage = 1, ipis_expected[2] = {0};
+
+       hv_init(pgs_gpa);
+       GUEST_SYNC(stage++);
+
+       /* Wait for receiver vCPUs to come up */
+       while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
+               nop_loop();
+       ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
+
+       /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+       ipi->vector = IPI_VECTOR;
+       ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
+       hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+       hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
+                        IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+       memset(hcall_page, 0, 4096);
+       ipi_ex->vector = IPI_VECTOR;
+       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+       ipi_ex->vp_set.valid_bank_mask = 1 << 0;
+       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+                        (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+       memset(hcall_page, 0, 4096);
+       ipi_ex->vector = IPI_VECTOR;
+       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+       ipi_ex->vp_set.valid_bank_mask = 1 << 1;
+       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+                        (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
+       memset(hcall_page, 0, 4096);
+       ipi_ex->vector = IPI_VECTOR;
+       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+       ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
+       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+       ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
+       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+                        (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
+       memset(hcall_page, 0, 4096);
+       ipi_ex->vector = IPI_VECTOR;
+       ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
+       hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+       /*
+        * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
+        */
+       ipi_ex->vp_set.valid_bank_mask = 0;
+       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
+                        IPI_VECTOR, HV_GENERIC_SET_ALL);
+       nop_loop();
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+       GUEST_SYNC(stage++);
+
+       GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+       int old, r;
+
+       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+       TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+                   vcpu->id, r);
+
+       vcpu_run(vcpu);
+
+       TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
+
+       return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+       void *retval;
+       int r;
+
+       r = pthread_cancel(thread);
+       TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+
+       r = pthread_join(thread, &retval);
+       TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+       TEST_ASSERT(retval == PTHREAD_CANCELED,
+                   "expected retval=%p, got %p", PTHREAD_CANCELED,
+                   retval);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu[3];
+       vm_vaddr_t hcall_page;
+       pthread_t threads[2];
+       int stage = 1, r;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI));
+
+       vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+       /* Hypercall input/output */
+       hcall_page = vm_vaddr_alloc_pages(vm, 2);
+       memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+
+       vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
+       vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
+       vcpu_set_hv_cpuid(vcpu[1]);
+
+       vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
+       vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
+       vcpu_set_hv_cpuid(vcpu[2]);
+
+       vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+       vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_hv_cpuid(vcpu[0]);
+
+       r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+       TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
+
+       r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+       TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
+
+       while (true) {
+               vcpu_run(vcpu[0]);
+
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu[0], &uc)) {
+               case UCALL_SYNC:
+                       TEST_ASSERT(uc.args[1] == stage,
+                                   "Unexpected stage: %ld (%d expected)",
+                                   uc.args[1], stage);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               stage++;
+       }
+
+done:
+       cancel_join_vcpu_thread(threads[0], vcpu[1]);
+       cancel_join_vcpu_thread(threads[1], vcpu[2]);
+       kvm_vm_free(vm);
+
+       return r;
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
new file mode 100644 (file)
index 0000000..0ddb632
--- /dev/null
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Tests for Hyper-V extensions to SVM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "hyperv.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/* Exit to L1 from L2 with RDMSR instruction */
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+       /* Currently, L1 doesn't preserve GPRs during vmexits. */
+       __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+                             "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                             "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+void l2_guest_code(void)
+{
+       u64 unused;
+
+       GUEST_SYNC(3);
+       /* Exit to L1 */
+       vmmcall();
+
+       /* MSR-Bitmap tests */
+       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+       rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+       vmmcall();
+       rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+       GUEST_SYNC(5);
+
+       /* L2 TLB flush tests */
+       hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+                        HV_HYPERCALL_FAST_BIT, 0x0,
+                        HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                        HV_FLUSH_ALL_PROCESSORS);
+       rdmsr_from_l2(MSR_FS_BASE);
+       /*
+        * Note: hypercall status (RAX) is not preserved correctly by L1 after
+        * synthetic vmexit, use unchecked version.
+        */
+       __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+                          HV_HYPERCALL_FAST_BIT, 0x0,
+                          HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                          HV_FLUSH_ALL_PROCESSORS, &unused);
+
+       /* Done, exit to L1 and never come back.  */
+       vmmcall();
+}
+
+static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
+                                                   struct hyperv_test_pages *hv_pages,
+                                                   vm_vaddr_t pgs_gpa)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+       struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
+
+       GUEST_SYNC(1);
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+       enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+
+       GUEST_ASSERT(svm->vmcb_gpa);
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm, l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* L2 TLB flush setup */
+       hve->partition_assist_page = hv_pages->partition_assist_gpa;
+       hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+       hve->hv_vm_id = 1;
+       hve->hv_vp_id = 1;
+       current_vp_assist->nested_control.features.directhypercall = 1;
+       *(u32 *)(hv_pages->partition_assist) = 0;
+
+       GUEST_SYNC(2);
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_SYNC(4);
+       vmcb->save.rip += 3;
+
+       /* Intercept RDMSR 0xc0000100 */
+       vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
+       __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+       vmcb->save.rip += 2; /* rdmsr */
+
+       /* Enable enlightened MSR bitmap */
+       hve->hv_enlightenments_control.msr_bitmap = 1;
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+       vmcb->save.rip += 2; /* rdmsr */
+
+       /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+       __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
+       /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+       vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
+       run_guest(vmcb, svm->vmcb_gpa);
+       /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       vmcb->save.rip += 3; /* vmcall */
+
+       /* Now tell KVM we've changed MSR-Bitmap */
+       vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+       vmcb->save.rip += 2; /* rdmsr */
+
+
+       /*
+        * L2 TLB flush test. First VMCALL should be handled directly by L0,
+        * no VMCALL exit expected.
+        */
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+       vmcb->save.rip += 2; /* rdmsr */
+       /* Enable synthetic vmexit */
+       *(u32 *)(hv_pages->partition_assist) = 1;
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
+       GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
+
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_SYNC(6);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
+       vm_vaddr_t hcall_page;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int stage;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+       TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_set_hv_cpuid(vcpu);
+       vcpu_alloc_svm(vm, &nested_gva);
+       vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+
+       hcall_page = vm_vaddr_alloc_pages(vm, 1);
+       memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
+
+       vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+       vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               /* UCALL_SYNC is handled here.  */
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
new file mode 100644 (file)
index 0000000..077cd0e
--- /dev/null
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+#include <asm/barrier.h>
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define WORKER_VCPU_ID_1 2
+#define WORKER_VCPU_ID_2 65
+
+#define NTRY 100
+#define NTEST_PAGES 2
+
+struct hv_vpset {
+       u64 format;
+       u64 valid_bank_mask;
+       u64 bank_contents[];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+       HV_GENERIC_SET_SPARSE_4K,
+       HV_GENERIC_SET_ALL,
+};
+
+#define HV_FLUSH_ALL_PROCESSORS                        BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES    BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY      BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT     BIT(3)
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+       u64 address_space;
+       u64 flags;
+       u64 processor_mask;
+       u64 gva_list[];
+} __packed;
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+       u64 address_space;
+       u64 flags;
+       struct hv_vpset hv_vp_set;
+       u64 gva_list[];
+} __packed;
+
+/*
+ * Pass the following info to 'workers' and 'sender'
+ * - Hypercall page's GVA
+ * - Hypercall page's GPA
+ * - Test pages GVA
+ * - GVAs of the test pages' PTEs
+ */
+struct test_data {
+       vm_vaddr_t hcall_gva;
+       vm_paddr_t hcall_gpa;
+       vm_vaddr_t test_pages;
+       vm_vaddr_t test_pages_pte[NTEST_PAGES];
+};
+
+/* 'Worker' vCPU code checking the contents of the test page */
+static void worker_guest_code(vm_vaddr_t test_data)
+{
+       struct test_data *data = (struct test_data *)test_data;
+       u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+       void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
+       u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
+       u64 expected, val;
+
+       x2apic_enable();
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+
+       for (;;) {
+               cpu_relax();
+
+               expected = READ_ONCE(*this_cpu);
+
+               /*
+                * Make sure the value in the test page is read after reading
+                * the expectation for the first time. Pairs with wmb() in
+                * prepare_to_test().
+                */
+               rmb();
+
+               val = READ_ONCE(*(u64 *)data->test_pages);
+
+               /*
+                * Make sure the value in the test page is read after before
+                * reading the expectation for the second time. Pairs with wmb()
+                * post_test().
+                */
+               rmb();
+
+               /*
+                * '0' indicates the sender is between iterations, wait until
+                * the sender is ready for this vCPU to start checking again.
+                */
+               if (!expected)
+                       continue;
+
+               /*
+                * Re-read the per-vCPU byte to ensure the sender didn't move
+                * onto a new iteration.
+                */
+               if (expected != READ_ONCE(*this_cpu))
+                       continue;
+
+               GUEST_ASSERT(val == expected);
+       }
+}
+
+/*
+ * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
+ * test page. '0' means don't check.
+ */
+static void set_expected_val(void *addr, u64 val, int vcpu_id)
+{
+       void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
+
+       *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
+}
+
+/*
+ * Update PTEs swapping two test pages.
+ * TODO: use swap()/xchg() when these are provided.
+ */
+static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
+{
+       uint64_t tmp = *(uint64_t *)pte_gva1;
+
+       *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
+       *(uint64_t *)pte_gva2 = tmp;
+}
+
+/*
+ * TODO: replace the silly NOP loop with a proper udelay() implementation.
+ */
+static inline void do_delay(void)
+{
+       int i;
+
+       for (i = 0; i < 1000000; i++)
+               asm volatile("nop");
+}
+
+/*
+ * Prepare to test: 'disable' workers by setting the expectation to '0',
+ * clear hypercall input page and then swap two test pages.
+ */
+static inline void prepare_to_test(struct test_data *data)
+{
+       /* Clear hypercall input page */
+       memset((void *)data->hcall_gva, 0, PAGE_SIZE);
+
+       /* 'Disable' workers */
+       set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
+       set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
+
+       /* Make sure workers are 'disabled' before we swap PTEs. */
+       wmb();
+
+       /* Make sure workers have enough time to notice */
+       do_delay();
+
+       /* Swap test page mappings */
+       swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
+}
+
+/*
+ * Finalize the test: check hypercall resule set the expected val for
+ * 'worker' CPUs and give them some time to test.
+ */
+static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
+{
+       /* Make sure we change the expectation after swapping PTEs */
+       wmb();
+
+       /* Set the expectation for workers, '0' means don't test */
+       set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
+       set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
+
+       /* Make sure workers have enough time to test */
+       do_delay();
+}
+
+#define TESTVAL1 0x0101010101010101
+#define TESTVAL2 0x0202020202020202
+
+/* Main vCPU doing the test */
+static void sender_guest_code(vm_vaddr_t test_data)
+{
+       struct test_data *data = (struct test_data *)test_data;
+       struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
+       struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
+       vm_paddr_t hcall_gpa = data->hcall_gpa;
+       int i, stage = 1;
+
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+       wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
+
+       /* "Slow" hypercalls */
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+                                hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+               flush->gva_list[0] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                       HV_FLUSH_ALL_PROCESSORS;
+               flush->processor_mask = 0;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+                                hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                       HV_FLUSH_ALL_PROCESSORS;
+               flush->gva_list[0] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               /* bank_contents and gva_list occupy the same space, thus [1] */
+               flush_ex->gva_list[1] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+                       BIT_ULL(WORKER_VCPU_ID_1 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+                       BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               /* bank_contents and gva_list occupy the same space, thus [2] */
+               flush_ex->gva_list[2] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+               flush_ex->gva_list[0] = (u64)data->test_pages;
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                hcall_gpa, hcall_gpa + PAGE_SIZE);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       /* "Fast" hypercalls */
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+               hyperv_write_xmm_input(&flush->processor_mask, 1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+                                HV_HYPERCALL_FAST_BIT, 0x0,
+                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+               flush->gva_list[0] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush->processor_mask, 1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               hyperv_write_xmm_input(&flush->processor_mask, 1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+                                HV_HYPERCALL_FAST_BIT, 0x0,
+                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                                HV_FLUSH_ALL_PROCESSORS);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush->gva_list[0] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush->processor_mask, 1);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
+                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+                                HV_FLUSH_ALL_PROCESSORS);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               /* bank_contents and gva_list occupy the same space, thus [1] */
+               flush_ex->gva_list[1] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+                       BIT_ULL(WORKER_VCPU_ID_1 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 :
+                         TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+                       BIT_ULL(WORKER_VCPU_ID_2 / 64);
+               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+               /* bank_contents and gva_list occupy the same space, thus [2] */
+               flush_ex->gva_list[2] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+                                HV_HYPERCALL_FAST_BIT,
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_SYNC(stage++);
+
+       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+       for (i = 0; i < NTRY; i++) {
+               prepare_to_test(data);
+               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+               flush_ex->gva_list[0] = (u64)data->test_pages;
+               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+                                HV_HYPERCALL_FAST_BIT |
+                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+                         i % 2 ? TESTVAL1 : TESTVAL2);
+       }
+
+       GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+       struct ucall uc;
+       int old;
+       int r;
+
+       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+       TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+                   vcpu->id, r);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               /* NOT REACHED */
+       default:
+               TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
+       }
+
+       return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+       void *retval;
+       int r;
+
+       r = pthread_cancel(thread);
+       TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+
+       r = pthread_join(thread, &retval);
+       TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+       TEST_ASSERT(retval == PTHREAD_CANCELED,
+                   "expected retval=%p, got %p", PTHREAD_CANCELED,
+                   retval);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu[3];
+       pthread_t threads[2];
+       vm_vaddr_t test_data_page, gva;
+       vm_paddr_t gpa;
+       uint64_t *pte;
+       struct test_data *data;
+       struct ucall uc;
+       int stage = 1, r, i;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH));
+
+       vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+       /* Test data page */
+       test_data_page = vm_vaddr_alloc_page(vm);
+       data = (struct test_data *)addr_gva2hva(vm, test_data_page);
+
+       /* Hypercall input/output */
+       data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
+       data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
+       memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
+
+       /*
+        * Test pages: the first one is filled with '0x01's, the second with '0x02's
+        * and the test will swap their mappings. The third page keeps the indication
+        * about the current state of mappings.
+        */
+       data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
+       for (i = 0; i < NTEST_PAGES; i++)
+               memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
+                      (u8)(i + 1), PAGE_SIZE);
+       set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
+       set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
+
+       /*
+        * Get PTE pointers for test pages and map them inside the guest.
+        * Use separate page for each PTE for simplicity.
+        */
+       gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
+       for (i = 0; i < NTEST_PAGES; i++) {
+               pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+               gpa = addr_hva2gpa(vm, pte);
+               __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+               data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
+       }
+
+       /*
+        * Sender vCPU which performs the test: swaps test pages, sets expectation
+        * for 'workers' and issues TLB flush hypercalls.
+        */
+       vcpu_args_set(vcpu[0], 1, test_data_page);
+       vcpu_set_hv_cpuid(vcpu[0]);
+
+       /* Create worker vCPUs which check the contents of the test pages */
+       vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
+       vcpu_args_set(vcpu[1], 1, test_data_page);
+       vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
+       vcpu_set_hv_cpuid(vcpu[1]);
+
+       vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
+       vcpu_args_set(vcpu[2], 1, test_data_page);
+       vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
+       vcpu_set_hv_cpuid(vcpu[2]);
+
+       r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+       TEST_ASSERT(!r, "pthread_create() failed");
+
+       r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+       TEST_ASSERT(!r, "pthread_create() failed");
+
+       while (true) {
+               vcpu_run(vcpu[0]);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu[0], &uc)) {
+               case UCALL_SYNC:
+                       TEST_ASSERT(uc.args[1] == stage,
+                                   "Unexpected stage: %ld (%d expected)",
+                                   uc.args[1], stage);
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               stage++;
+       }
+
+done:
+       cancel_join_vcpu_thread(threads[0], vcpu[1]);
+       cancel_join_vcpu_thread(threads[1], vcpu[2]);
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
new file mode 100644 (file)
index 0000000..5bc1222
--- /dev/null
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the KVM clock from userspace
+ */
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct test_case {
+       uint64_t kvmclock_base;
+       int64_t realtime_offset;
+};
+
+static struct test_case test_cases[] = {
+       { .kvmclock_base = 0 },
+       { .kvmclock_base = 180 * NSEC_PER_SEC },
+       { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
+       { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
+};
+
+#define GUEST_SYNC_CLOCK(__stage, __val)                       \
+               GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+{
+       int i;
+
+       wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+       for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+               GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
+}
+
+#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+static inline void assert_flags(struct kvm_clock_data *data)
+{
+       TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
+                   "unexpected clock data flags: %x (want set: %x)",
+                   data->flags, EXPECTED_FLAGS);
+}
+
+static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
+                       struct kvm_clock_data *end)
+{
+       uint64_t obs, exp_lo, exp_hi;
+
+       obs = uc->args[2];
+       exp_lo = start->clock;
+       exp_hi = end->clock;
+
+       assert_flags(start);
+       assert_flags(end);
+
+       TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
+                   "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+                   obs, exp_lo, exp_hi);
+
+       pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+               obs, exp_lo, exp_hi);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+       REPORT_GUEST_ASSERT(*uc);
+}
+
+static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
+{
+       struct kvm_clock_data data;
+
+       memset(&data, 0, sizeof(data));
+
+       data.clock = test_case->kvmclock_base;
+       if (test_case->realtime_offset) {
+               struct timespec ts;
+               int r;
+
+               data.flags |= KVM_CLOCK_REALTIME;
+               do {
+                       r = clock_gettime(CLOCK_REALTIME, &ts);
+                       if (!r)
+                               break;
+               } while (errno == EINTR);
+
+               TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
+
+               data.realtime = ts.tv_sec * NSEC_PER_SEC;
+               data.realtime += ts.tv_nsec;
+               data.realtime += test_case->realtime_offset;
+       }
+
+       vm_ioctl(vm, KVM_SET_CLOCK, &data);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+       struct kvm_clock_data start, end;
+       struct kvm_vm *vm = vcpu->vm;
+       struct ucall uc;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+               setup_clock(vm, &test_cases[i]);
+
+               vm_ioctl(vm, KVM_GET_CLOCK, &start);
+
+               vcpu_run(vcpu);
+               vm_ioctl(vm, KVM_GET_CLOCK, &end);
+
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       handle_sync(&uc, &start, &end);
+                       break;
+               case UCALL_ABORT:
+                       handle_abort(&uc);
+                       return;
+               default:
+                       TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
+               }
+       }
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t pvti_gva;
+       vm_paddr_t pvti_gpa;
+       struct kvm_vm *vm;
+       int flags;
+
+       flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
+       TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
+
+       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+       pvti_gpa = addr_gva2gpa(vm, pvti_gva);
+       vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
+
+       enter_guest(vcpu);
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
new file mode 100644 (file)
index 0000000..78878b3
--- /dev/null
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct msr_data {
+       uint32_t idx;
+       const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+       TEST_MSR(MSR_KVM_SYSTEM_TIME),
+       TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+       TEST_MSR(MSR_KVM_WALL_CLOCK),
+       TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+       TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+       TEST_MSR(MSR_KVM_STEAL_TIME),
+       TEST_MSR(MSR_KVM_PV_EOI_EN),
+       TEST_MSR(MSR_KVM_POLL_CONTROL),
+       TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+       TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+       uint64_t ignored;
+       uint8_t vector;
+
+       PR_MSR(msr);
+
+       vector = rdmsr_safe(msr->idx, &ignored);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+       vector = wrmsr_safe(msr->idx, 0);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
+}
+
+struct hcall_data {
+       uint64_t nr;
+       const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+       TEST_HCALL(KVM_HC_KICK_CPU),
+       TEST_HCALL(KVM_HC_SEND_IPI),
+       TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+       uint64_t r;
+
+       PR_HCALL(hc);
+       r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+       GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+               test_msr(&msrs_to_test[i]);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+               test_hcall(&hcalls_to_test[i]);
+       }
+
+       GUEST_DONE();
+}
+
+static void pr_msr(struct ucall *uc)
+{
+       struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+       pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+       struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+       pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       while (true) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_PR_MSR:
+                       pr_msr(&uc);
+                       break;
+               case UCALL_PR_HCALL:
+                       pr_hcall(&uc);
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       return;
+               case UCALL_DONE:
+                       return;
+               }
+       }
+}
+
+static void test_pv_unhalt(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_cpuid_entry2 *ent;
+       u32 kvm_sig_old;
+
+       pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+
+       TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+
+       /* KVM_PV_UNHALT test */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+       vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+
+       TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+                   "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
+
+       /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+       kvm_sig_old = ent->ebx;
+       ent->ebx = 0xdeadbeef;
+       vcpu_set_cpuid(vcpu);
+
+       vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+       ent->ebx = kvm_sig_old;
+       vcpu_set_cpuid(vcpu);
+
+       TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+                   "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+
+       /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
+
+       kvm_vm_free(vm);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+       vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
+
+       vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
+
+       enter_guest(vcpu);
+       kvm_vm_free(vm);
+
+       test_pv_unhalt();
+}
diff --git a/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c
new file mode 100644 (file)
index 0000000..7e2bfb3
--- /dev/null
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * maximum APIC ID capability tests
+ *
+ * Copyright (C) 2022, Intel, Inc.
+ *
+ * Tests for getting/setting maximum APIC ID capability
+ */
+
+#include "kvm_util.h"
+
+#define MAX_VCPU_ID    2
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones();
+
+       /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
+       ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+
+       /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
+       ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
+       TEST_ASSERT(ret < 0,
+                   "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
+
+       /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */
+       if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
+               vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID);
+
+               /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */
+               ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1);
+               TEST_ASSERT(ret < 0,
+                           "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail");
+       }
+
+       /* Set KVM_CAP_MAX_VCPU_ID */
+       vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
+
+       /* Try to set KVM_CAP_MAX_VCPU_ID again */
+       ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
+       TEST_ASSERT(ret < 0,
+                   "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
+
+       /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */
+       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
+       TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
+
+       /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */
+       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32));
+       TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail");
+
+       /* Create vCPU with id within bounds */
+       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0);
+       TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed");
+
+       close(ret);
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
new file mode 100644 (file)
index 0000000..2b550ef
--- /dev/null
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define CPUID_MWAIT (1u << 3)
+
+enum monitor_mwait_testcases {
+       MWAIT_QUIRK_DISABLED = BIT(0),
+       MISC_ENABLES_QUIRK_DISABLED = BIT(1),
+       MWAIT_DISABLED = BIT(2),
+};
+
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector)             \
+do {                                                                   \
+       bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) &&      \
+                           ((testcase) & MWAIT_DISABLED);              \
+                                                                       \
+       if (fault_wanted)                                               \
+               __GUEST_ASSERT((vector) == UD_VECTOR,                   \
+                              "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
+                              testcase, vector);                       \
+       else                                                            \
+               __GUEST_ASSERT(!(vector),                               \
+                              "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
+                              testcase, vector);                       \
+} while (0)
+
+static void guest_monitor_wait(int testcase)
+{
+       u8 vector;
+
+       GUEST_SYNC(testcase);
+
+       /*
+        * Arbitrarily MONITOR this function, SVM performs fault checks before
+        * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
+        */
+       vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
+       GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
+
+       vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
+       GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
+}
+
+static void guest_code(void)
+{
+       guest_monitor_wait(MWAIT_DISABLED);
+
+       guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+
+       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
+       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
+
+       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       uint64_t disabled_quirks;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int testcase;
+
+       TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+
+       while (1) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       testcase = uc.args[1];
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       goto done;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+                       goto done;
+               }
+
+               disabled_quirks = 0;
+               if (testcase & MWAIT_QUIRK_DISABLED)
+                       disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+               if (testcase & MISC_ENABLES_QUIRK_DISABLED)
+                       disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+               vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+               /*
+                * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
+                * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
+                * bit in MISC_ENABLES accordingly.  If the quirk is enabled,
+                * the only valid configuration is MWAIT disabled, as CPUID
+                * can't be manually changed after running the vCPU.
+                */
+               if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
+                       TEST_ASSERT(testcase & MWAIT_DISABLED,
+                                   "Can't toggle CPUID features after running vCPU");
+                       continue;
+               }
+
+               vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
+                            (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
new file mode 100644 (file)
index 0000000..3eb0313
--- /dev/null
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/*
+ * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
+ * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
+ */
+#define FAKE_TRIPLE_FAULT_VECTOR       0xaa
+
+/* Arbitrary 32-bit error code injected by this test. */
+#define SS_ERROR_CODE 0xdeadbeef
+
+/*
+ * Bit '0' is set on Intel if the exception occurs while delivering a previous
+ * event/exception.  AMD's wording is ambiguous, but presumably the bit is set
+ * if the exception occurs while delivering an external event, e.g. NMI or INTR,
+ * but not for exceptions that occur when delivering other exceptions or
+ * software interrupts.
+ *
+ * Note, Intel's name for it, "External event", is misleading and much more
+ * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
+ */
+#define ERROR_CODE_EXT_FLAG    BIT(0)
+
+/*
+ * Bit '1' is set if the fault occurred when looking up a descriptor in the
+ * IDT, which is the case here as the IDT is empty/NULL.
+ */
+#define ERROR_CODE_IDT_FLAG    BIT(1)
+
+/*
+ * The #GP that occurs when vectoring #SS should show the index into the IDT
+ * for #SS, plus have the "IDT flag" set.
+ */
+#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
+#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
+
+/*
+ * Intel and AMD both shove '0' into the error code on #DF, regardless of what
+ * led to the double fault.
+ */
+#define DF_ERROR_CODE 0
+
+#define INTERCEPT_SS           (BIT_ULL(SS_VECTOR))
+#define INTERCEPT_SS_DF                (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
+#define INTERCEPT_SS_GP_DF     (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
+
+static void l2_ss_pending_test(void)
+{
+       GUEST_SYNC(SS_VECTOR);
+}
+
+static void l2_ss_injected_gp_test(void)
+{
+       GUEST_SYNC(GP_VECTOR);
+}
+
+static void l2_ss_injected_df_test(void)
+{
+       GUEST_SYNC(DF_VECTOR);
+}
+
+static void l2_ss_injected_tf_test(void)
+{
+       GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
+}
+
+static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
+                      uint32_t error_code)
+{
+       struct vmcb *vmcb = svm->vmcb;
+       struct vmcb_control_area *ctrl = &vmcb->control;
+
+       vmcb->save.rip = (u64)l2_code;
+       run_guest(vmcb, svm->vmcb_gpa);
+
+       if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+               return;
+
+       GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
+       GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+       struct vmcb_control_area *ctrl = &svm->vmcb->control;
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+       svm->vmcb->save.idtr.limit = 0;
+       ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
+
+       ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
+       svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
+       svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
+
+       ctrl->intercept_exceptions = INTERCEPT_SS_DF;
+       svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+       ctrl->intercept_exceptions = INTERCEPT_SS;
+       svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+       GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
+
+       GUEST_DONE();
+}
+
+static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+{
+       GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
+
+       GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
+
+       if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+               return;
+
+       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+       GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
+       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+
+       GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+       prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+       GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
+
+       /*
+        * VMX disallows injecting an exception with error_code[31:16] != 0,
+        * and hardware will never generate a VM-Exit with bits 31:16 set.
+        * KVM should likewise truncate the "bad" userspace value.
+        */
+       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
+       vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
+       vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
+
+       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
+       vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
+       vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
+
+       GUEST_DONE();
+}
+
+static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
+{
+       if (this_cpu_has(X86_FEATURE_SVM))
+               l1_svm_code(test_data);
+       else
+               l1_vmx_code(test_data);
+}
+
+static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
+{
+       struct ucall uc;
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               TEST_ASSERT(vector == uc.args[1],
+                           "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
+               break;
+       case UCALL_DONE:
+               TEST_ASSERT(vector == -1,
+                           "Expected L2 to ask for %d, L2 says it's done", vector);
+               break;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       default:
+               TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
+       }
+}
+
+static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
+{
+       struct kvm_vcpu_events events;
+
+       vcpu_events_get(vcpu, &events);
+
+       TEST_ASSERT(!events.exception.pending,
+                   "Vector %d unexpectedlt pending", events.exception.nr);
+       TEST_ASSERT(!events.exception.injected,
+                   "Vector %d unexpectedly injected", events.exception.nr);
+
+       events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
+       events.exception.pending = !inject;
+       events.exception.injected = inject;
+       events.exception.nr = SS_VECTOR;
+       events.exception.has_error_code = true;
+       events.exception.error_code = SS_ERROR_CODE;
+       vcpu_events_set(vcpu, &events);
+}
+
+/*
+ * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
+ * when an exception is being queued for L2.  Specifically, verify that KVM
+ * honors L1 exception intercept controls when a #SS is pending/injected,
+ * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
+ * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
+ */
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t nested_test_data_gva;
+       struct kvm_vcpu_events events;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+       vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+       if (kvm_cpu_has(X86_FEATURE_SVM))
+               vcpu_alloc_svm(vm, &nested_test_data_gva);
+       else
+               vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+       vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+       /* Run L1 => L2.  L2 should sync and request #SS. */
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, SS_VECTOR);
+
+       /* Pend #SS and request immediate exit.  #SS should still be pending. */
+       queue_ss_exception(vcpu, false);
+       vcpu->run->immediate_exit = true;
+       vcpu_run_complete_io(vcpu);
+
+       /* Verify the pending events comes back out the same as it went in. */
+       vcpu_events_get(vcpu, &events);
+       TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+                       KVM_VCPUEVENT_VALID_PAYLOAD);
+       TEST_ASSERT_EQ(events.exception.pending, true);
+       TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+       TEST_ASSERT_EQ(events.exception.has_error_code, true);
+       TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+
+       /*
+        * Run for real with the pending #SS, L1 should get a VM-Exit due to
+        * #SS interception and re-enter L2 to request #GP (via injected #SS).
+        */
+       vcpu->run->immediate_exit = false;
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, GP_VECTOR);
+
+       /*
+        * Inject #SS, the #SS should bypass interception and cause #GP, which
+        * L1 should intercept before KVM morphs it to #DF.  L1 should then
+        * disable #GP interception and run L2 to request #DF (via #SS => #GP).
+        */
+       queue_ss_exception(vcpu, true);
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, DF_VECTOR);
+
+       /*
+        * Inject #SS, the #SS should bypass interception and cause #GP, which
+        * L1 is no longer interception, and so should see a #DF VM-Exit.  L1
+        * should then signal that is done.
+        */
+       queue_ss_exception(vcpu, true);
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
+
+       /*
+        * Inject #SS yet again.  L1 is not intercepting #GP or #DF, and so
+        * should see nested TRIPLE_FAULT / SHUTDOWN.
+        */
+       queue_ss_exception(vcpu, true);
+       vcpu_run(vcpu);
+       assert_ucall_vector(vcpu, -1);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
new file mode 100644 (file)
index 0000000..e7efb2b
--- /dev/null
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Usage: to be run via nx_huge_page_test.sh, which does the necessary
+ * environment setup and teardown
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdint.h>
+#include <time.h>
+
+#include <test_util.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HPAGE_SLOT             10
+#define HPAGE_GPA              (4UL << 30) /* 4G prevents collision w/ slot 0 */
+#define HPAGE_GVA              HPAGE_GPA /* GVA is arbitrary, so use GPA. */
+#define PAGES_PER_2MB_HUGE_PAGE 512
+#define HPAGE_SLOT_NPAGES      (3 * PAGES_PER_2MB_HUGE_PAGE)
+
+/*
+ * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
+ * being run without it.
+ */
+#define MAGIC_TOKEN 887563923
+
+/*
+ * x86 opcode for the return instruction. Used to call into, and then
+ * immediately return from, memory backed with hugepages.
+ */
+#define RETURN_OPCODE 0xC3
+
+/* Call the specified memory address. */
+static void guest_do_CALL(uint64_t target)
+{
+       ((void (*)(void)) target)();
+}
+
+/*
+ * Exit the VM after each memory access so that the userspace component of the
+ * test can make assertions about the pages backing the VM.
+ *
+ * See the below for an explanation of how each access should affect the
+ * backing mappings.
+ */
+void guest_code(void)
+{
+       uint64_t hpage_1 = HPAGE_GVA;
+       uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+       uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+
+       READ_ONCE(*(uint64_t *)hpage_1);
+       GUEST_SYNC(1);
+
+       READ_ONCE(*(uint64_t *)hpage_2);
+       GUEST_SYNC(2);
+
+       guest_do_CALL(hpage_1);
+       GUEST_SYNC(3);
+
+       guest_do_CALL(hpage_3);
+       GUEST_SYNC(4);
+
+       READ_ONCE(*(uint64_t *)hpage_1);
+       GUEST_SYNC(5);
+
+       READ_ONCE(*(uint64_t *)hpage_3);
+       GUEST_SYNC(6);
+}
+
+static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
+{
+       int actual_pages_2m;
+
+       actual_pages_2m = vm_get_stat(vm, "pages_2m");
+
+       TEST_ASSERT(actual_pages_2m == expected_pages_2m,
+                   "Unexpected 2m page count. Expected %d, got %d",
+                   expected_pages_2m, actual_pages_2m);
+}
+
+static void check_split_count(struct kvm_vm *vm, int expected_splits)
+{
+       int actual_splits;
+
+       actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+
+       TEST_ASSERT(actual_splits == expected_splits,
+                   "Unexpected NX huge page split count. Expected %d, got %d",
+                   expected_splits, actual_splits);
+}
+
+static void wait_for_reclaim(int reclaim_period_ms)
+{
+       long reclaim_wait_ms;
+       struct timespec ts;
+
+       reclaim_wait_ms = reclaim_period_ms * 5;
+       ts.tv_sec = reclaim_wait_ms / 1000;
+       ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
+       nanosleep(&ts, NULL);
+}
+
+void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
+             bool reboot_permissions)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t nr_bytes;
+       void *hva;
+       int r;
+
+       vm = vm_create(1);
+
+       if (disable_nx_huge_pages) {
+               r = __vm_disable_nx_huge_pages(vm);
+               if (reboot_permissions) {
+                       TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
+               } else {
+                       TEST_ASSERT(r == -1 && errno == EPERM,
+                                   "This process should not have permission to disable NX huge pages");
+                       return;
+               }
+       }
+
+       vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
+                                   HPAGE_GPA, HPAGE_SLOT,
+                                   HPAGE_SLOT_NPAGES, 0);
+
+       nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
+
+       /*
+        * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
+        * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
+        * whenever KVM is shadowing the guest page tables).
+        *
+        * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
+        * pages irrespective of the guest page size, so map with 4KiB pages
+        * to test that that is the case.
+        */
+       if (kvm_is_tdp_enabled())
+               virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
+       else
+               virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
+
+       hva = addr_gpa2hva(vm, HPAGE_GPA);
+       memset(hva, RETURN_OPCODE, nr_bytes);
+
+       check_2m_page_count(vm, 0);
+       check_split_count(vm, 0);
+
+       /*
+        * The guest code will first read from the first hugepage, resulting
+        * in a huge page mapping being created.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, 1);
+       check_split_count(vm, 0);
+
+       /*
+        * Then the guest code will read from the second hugepage, resulting
+        * in another huge page mapping being created.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, 2);
+       check_split_count(vm, 0);
+
+       /*
+        * Next, the guest will execute from the first huge page, causing it
+        * to be remapped at 4k.
+        *
+        * If NX huge pages are disabled, this should have no effect.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
+       check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
+
+       /*
+        * Executing from the third huge page (previously unaccessed) will
+        * cause part to be mapped at 4k.
+        *
+        * If NX huge pages are disabled, it should be mapped at 2M.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+       check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+       /* Reading from the first huge page again should have no effect. */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+       check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+       /* Give recovery thread time to run. */
+       wait_for_reclaim(reclaim_period_ms);
+
+       /*
+        * Now that the reclaimer has run, all the split pages should be gone.
+        *
+        * If NX huge pages are disabled, the relaimer will not run, so
+        * nothing should change from here on.
+        */
+       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+       check_split_count(vm, 0);
+
+       /*
+        * The 4k mapping on hpage 3 should have been removed, so check that
+        * reading from it causes a huge page mapping to be installed.
+        */
+       vcpu_run(vcpu);
+       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
+       check_split_count(vm, 0);
+
+       kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
+       puts("");
+       printf(" -p: The NX reclaim period in milliseconds.\n");
+       printf(" -t: The magic token to indicate environment setup is done.\n");
+       printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
+       puts("");
+       exit(0);
+}
+
+int main(int argc, char **argv)
+{
+       int reclaim_period_ms = 0, token = 0, opt;
+       bool reboot_permissions = false;
+
+       while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
+               switch (opt) {
+               case 'p':
+                       reclaim_period_ms = atoi_positive("Reclaim period", optarg);
+                       break;
+               case 't':
+                       token = atoi_paranoid(optarg);
+                       break;
+               case 'r':
+                       reboot_permissions = true;
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
+
+       __TEST_REQUIRE(token == MAGIC_TOKEN,
+                      "This test must be run with the magic token via '-t %d'.\n"
+                      "Running via nx_huge_pages_test.sh, which also handles "
+                      "environment setup, is strongly recommended.", MAGIC_TOKEN);
+
+       run_test(reclaim_period_ms, false, reboot_permissions);
+       run_test(reclaim_period_ms, true, reboot_permissions);
+
+       return 0;
+}
+
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh
new file mode 100755 (executable)
index 0000000..caad084
--- /dev/null
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only */
+#
+# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
+# Makes use of root privileges to set up huge pages and KVM module parameters.
+#
+# Copyright (C) 2022, Google LLC.
+
+set -e
+
+NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
+NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
+NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
+HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+
+# If we're already root, the host might not have sudo.
+if [ $(whoami) == "root" ]; then
+       function do_sudo () {
+               "$@"
+       }
+else
+       function do_sudo () {
+               sudo "$@"
+       }
+fi
+
+set +e
+
+function sudo_echo () {
+       echo "$1" | do_sudo tee -a "$2" > /dev/null
+}
+
+NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
+
+sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
+
+(
+       set -e
+
+       sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
+       sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+       sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+       sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+       # Test with reboot permissions
+       if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
+               echo Running test with CAP_SYS_BOOT enabled
+               $NXECUTABLE -t 887563923 -p 100 -r
+               test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
+       else
+               echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
+       fi
+
+       # Test without reboot permissions
+       if [ $(whoami) != "root" ] ; then
+               echo Running test with CAP_SYS_BOOT disabled
+               $NXECUTABLE -t 887563923 -p 100
+       else
+               echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
+       fi
+)
+RET=$?
+
+sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+exit $RET
diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c
new file mode 100644 (file)
index 0000000..9cbf283
--- /dev/null
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+       uint64_t msr_platform_info;
+       uint8_t vector;
+
+       GUEST_SYNC(true);
+       msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+       GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+                       MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+
+       GUEST_SYNC(false);
+       vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t msr_platform_info;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
+       vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
+                    msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+
+       for (;;) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+                       break;
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
new file mode 100644 (file)
index 0000000..698cb36
--- /dev/null
@@ -0,0 +1,644 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#include <x86intrin.h>
+
+#include "pmu.h"
+#include "processor.h"
+
+/* Number of iterations of the loop for the guest measurement payload. */
+#define NUM_LOOPS                      10
+
+/* Each iteration of the loop retires one branch instruction. */
+#define NUM_BRANCH_INSNS_RETIRED       (NUM_LOOPS)
+
+/*
+ * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
+ * 1 LOOP.
+ */
+#define NUM_INSNS_PER_LOOP             3
+
+/*
+ * Number of "extra" instructions that will be counted, i.e. the number of
+ * instructions that are needed to set up the loop and then disable the
+ * counter.  2 MOV, 2 XOR, 1 WRMSR.
+ */
+#define NUM_EXTRA_INSNS                        5
+
+/* Total number of instructions retired within the measured section. */
+#define NUM_INSNS_RETIRED              (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+
+
+static uint8_t kvm_pmu_version;
+static bool kvm_has_perf_caps;
+
+static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+                                                 void *guest_code,
+                                                 uint8_t pmu_version,
+                                                 uint64_t perf_capabilities)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       sync_global_to_guest(vm, kvm_pmu_version);
+
+       /*
+        * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
+        * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
+        */
+       if (kvm_has_perf_caps)
+               vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+       vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
+       return vm;
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       do {
+               vcpu_run(vcpu);
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_PRINTF:
+                       pr_info("%s", uc.buffer);
+                       break;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+               }
+       } while (uc.cmd != UCALL_DONE);
+}
+
+static uint8_t guest_get_pmu_version(void)
+{
+       /*
+        * Return the effective PMU version, i.e. the minimum between what KVM
+        * supports and what is enumerated to the guest.  The host deliberately
+        * advertises a PMU version to the guest beyond what is actually
+        * supported by KVM to verify KVM doesn't freak out and do something
+        * bizarre with an architecturally valid, but unsupported, version.
+        */
+       return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+}
+
+/*
+ * If an architectural event is supported and guaranteed to generate at least
+ * one "hit, assert that its count is non-zero.  If an event isn't supported or
+ * the test can't guarantee the associated action will occur, then all bets are
+ * off regarding the count, i.e. no checks can be done.
+ *
+ * Sanity check that in all cases, the event doesn't count when it's disabled,
+ * and that KVM correctly emulates the write of an arbitrary value.
+ */
+static void guest_assert_event_count(uint8_t idx,
+                                    struct kvm_x86_pmu_feature event,
+                                    uint32_t pmc, uint32_t pmc_msr)
+{
+       uint64_t count;
+
+       count = _rdpmc(pmc);
+       if (!this_pmu_has(event))
+               goto sanity_checks;
+
+       switch (idx) {
+       case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
+               GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+               break;
+       case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
+               GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
+               break;
+       case INTEL_ARCH_LLC_REFERENCES_INDEX:
+       case INTEL_ARCH_LLC_MISSES_INDEX:
+               if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+                   !this_cpu_has(X86_FEATURE_CLFLUSH))
+                       break;
+               fallthrough;
+       case INTEL_ARCH_CPU_CYCLES_INDEX:
+       case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+               GUEST_ASSERT_NE(count, 0);
+               break;
+       case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
+               GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+               break;
+       default:
+               break;
+       }
+
+sanity_checks:
+       __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+       GUEST_ASSERT_EQ(_rdpmc(pmc), count);
+
+       wrmsr(pmc_msr, 0xdead);
+       GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
+}
+
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence.  Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
+ * misses, i.e. to allow testing that those events actually count.
+ *
+ * If forced emulation is enabled (and specified), force emulation on a subset
+ * of the measured code to verify that KVM correctly emulates instructions and
+ * branches retired events in conjunction with hardware also counting said
+ * events.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)                                \
+do {                                                                           \
+       __asm__ __volatile__("wrmsr\n\t"                                        \
+                            " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t"      \
+                            "1:\n\t"                                           \
+                            clflush "\n\t"                                     \
+                            "mfence\n\t"                                       \
+                            FEP "loop 1b\n\t"                                  \
+                            FEP "mov %%edi, %%ecx\n\t"                         \
+                            FEP "xor %%eax, %%eax\n\t"                         \
+                            FEP "xor %%edx, %%edx\n\t"                         \
+                            "wrmsr\n\t"                                        \
+                            :: "a"((uint32_t)_value), "d"(_value >> 32),       \
+                               "c"(_msr), "D"(_msr)                            \
+       );                                                                      \
+} while (0)
+
+#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+do {                                                                           \
+       wrmsr(pmc_msr, 0);                                                      \
+                                                                               \
+       if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))                               \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP);    \
+       else if (this_cpu_has(X86_FEATURE_CLFLUSH))                             \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP);       \
+       else                                                                    \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);             \
+                                                                               \
+       guest_assert_event_count(_idx, _event, _pmc, _pmc_msr);                 \
+} while (0)
+
+static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
+                                   uint32_t pmc, uint32_t pmc_msr,
+                                   uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+{
+       GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+
+       if (is_forced_emulation_enabled)
+               GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
+}
+
+#define X86_PMU_FEATURE_NULL                                           \
+({                                                                     \
+       struct kvm_x86_pmu_feature feature = {};                        \
+                                                                       \
+       feature;                                                        \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+       return !(*(u64 *)&event);
+}
+
+static void guest_test_arch_event(uint8_t idx)
+{
+       const struct {
+               struct kvm_x86_pmu_feature gp_event;
+               struct kvm_x86_pmu_feature fixed_event;
+       } intel_event_to_feature[] = {
+               [INTEL_ARCH_CPU_CYCLES_INDEX]            = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+               [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]  = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+               /*
+                * Note, the fixed counter for reference cycles is NOT the same
+                * as the general purpose architectural event.  The fixed counter
+                * explicitly counts at the same frequency as the TSC, whereas
+                * the GP event counts at a fixed, but uarch specific, frequency.
+                * Bundle them here for simplicity.
+                */
+               [INTEL_ARCH_REFERENCE_CYCLES_INDEX]      = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+               [INTEL_ARCH_LLC_REFERENCES_INDEX]        = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_LLC_MISSES_INDEX]            = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_BRANCHES_RETIRED_INDEX]      = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_TOPDOWN_SLOTS_INDEX]         = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+       };
+
+       uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+       uint32_t pmu_version = guest_get_pmu_version();
+       /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
+       bool guest_has_perf_global_ctrl = pmu_version >= 2;
+       struct kvm_x86_pmu_feature gp_event, fixed_event;
+       uint32_t base_pmc_msr;
+       unsigned int i;
+
+       /* The host side shouldn't invoke this without a guest PMU. */
+       GUEST_ASSERT(pmu_version);
+
+       if (this_cpu_has(X86_FEATURE_PDCM) &&
+           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+               base_pmc_msr = MSR_IA32_PMC0;
+       else
+               base_pmc_msr = MSR_IA32_PERFCTR0;
+
+       gp_event = intel_event_to_feature[idx].gp_event;
+       GUEST_ASSERT_EQ(idx, gp_event.f.bit);
+
+       GUEST_ASSERT(nr_gp_counters);
+
+       for (i = 0; i < nr_gp_counters; i++) {
+               uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+                                   ARCH_PERFMON_EVENTSEL_ENABLE |
+                                   intel_pmu_arch_events[idx];
+
+               wrmsr(MSR_P6_EVNTSEL0 + i, 0);
+               if (guest_has_perf_global_ctrl)
+                       wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
+
+               __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+                                       MSR_P6_EVNTSEL0 + i, eventsel);
+       }
+
+       if (!guest_has_perf_global_ctrl)
+               return;
+
+       fixed_event = intel_event_to_feature[idx].fixed_event;
+       if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+               return;
+
+       i = fixed_event.f.bit;
+
+       wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+
+       __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+                               MSR_CORE_PERF_FIXED_CTR0 + i,
+                               MSR_CORE_PERF_GLOBAL_CTRL,
+                               FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+}
+
+static void guest_test_arch_events(void)
+{
+       uint8_t i;
+
+       for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
+               guest_test_arch_event(i);
+
+       GUEST_DONE();
+}
+
+static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
+                            uint8_t length, uint8_t unavailable_mask)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /* Testing arch events requires a vPMU (there are no negative tests). */
+       if (!pmu_version)
+               return;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
+                               length);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
+                               unavailable_mask);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
+ * that aren't defined counter MSRs *probably* don't exist, but there's no
+ * guarantee that currently undefined MSR indices won't be used for something
+ * other than PMCs in the future.
+ */
+#define MAX_NR_GP_COUNTERS     8
+#define MAX_NR_FIXED_COUNTERS  3
+
+#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)              \
+__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,                      \
+              "Expected %s on " #insn "(0x%x), got vector %u",                 \
+              expect_gp ? "#GP" : "no fault", msr, vector)                     \
+
+#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)                       \
+       __GUEST_ASSERT(val == expected_val,                                     \
+                      "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",    \
+                      msr, expected_val, val);
+
+static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
+                            uint64_t expected_val)
+{
+       uint8_t vector;
+       uint64_t val;
+
+       vector = rdpmc_safe(rdpmc_idx, &val);
+       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+       if (expect_success)
+               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+
+       if (!is_forced_emulation_enabled)
+               return;
+
+       vector = rdpmc_safe_fep(rdpmc_idx, &val);
+       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+       if (expect_success)
+               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+}
+
+static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
+                                uint8_t nr_counters, uint32_t or_mask)
+{
+       const bool pmu_has_fast_mode = !guest_get_pmu_version();
+       uint8_t i;
+
+       for (i = 0; i < nr_possible_counters; i++) {
+               /*
+                * TODO: Test a value that validates full-width writes and the
+                * width of the counters.
+                */
+               const uint64_t test_val = 0xffff;
+               const uint32_t msr = base_msr + i;
+
+               /*
+                * Fixed counters are supported if the counter is less than the
+                * number of enumerated contiguous counters *or* the counter is
+                * explicitly enumerated in the supported counters mask.
+                */
+               const bool expect_success = i < nr_counters || (or_mask & BIT(i));
+
+               /*
+                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
+                * unsupported, i.e. doesn't #GP and reads back '0'.
+                */
+               const uint64_t expected_val = expect_success ? test_val : 0;
+               const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
+                                      msr != MSR_P6_PERFCTR1;
+               uint32_t rdpmc_idx;
+               uint8_t vector;
+               uint64_t val;
+
+               vector = wrmsr_safe(msr, test_val);
+               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+
+               vector = rdmsr_safe(msr, &val);
+               GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
+
+               /* On #GP, the result of RDMSR is undefined. */
+               if (!expect_gp)
+                       GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
+
+               /*
+                * Redo the read tests with RDPMC, which has different indexing
+                * semantics and additional capabilities.
+                */
+               rdpmc_idx = i;
+               if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
+                       rdpmc_idx |= INTEL_RDPMC_FIXED;
+
+               guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
+
+               /*
+                * KVM doesn't support non-architectural PMUs, i.e. it should
+                * impossible to have fast mode RDPMC.  Verify that attempting
+                * to use fast RDPMC always #GPs.
+                */
+               GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
+               rdpmc_idx |= INTEL_RDPMC_FAST;
+               guest_test_rdpmc(rdpmc_idx, false, -1ull);
+
+               vector = wrmsr_safe(msr, 0);
+               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+       }
+}
+
+static void guest_test_gp_counters(void)
+{
+       uint8_t pmu_version = guest_get_pmu_version();
+       uint8_t nr_gp_counters = 0;
+       uint32_t base_msr;
+
+       if (pmu_version)
+               nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+
+       /*
+        * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
+        * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
+        * of GP counters.  If there are no GP counters, require KVM to leave
+        * PERF_GLOBAL_CTRL '0'.  This edge case isn't covered by the SDM, but
+        * follow the spirit of the architecture and only globally enable GP
+        * counters, of which there are none.
+        */
+       if (pmu_version > 1) {
+               uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
+
+               if (nr_gp_counters)
+                       GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
+               else
+                       GUEST_ASSERT_EQ(global_ctrl, 0);
+       }
+
+       if (this_cpu_has(X86_FEATURE_PDCM) &&
+           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+               base_msr = MSR_IA32_PMC0;
+       else
+               base_msr = MSR_IA32_PERFCTR0;
+
+       guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
+       GUEST_DONE();
+}
+
+static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+                            uint8_t nr_gp_counters)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
+                               nr_gp_counters);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void guest_test_fixed_counters(void)
+{
+       uint64_t supported_bitmask = 0;
+       uint8_t nr_fixed_counters = 0;
+       uint8_t i;
+
+       /* Fixed counters require Architectural vPMU Version 2+. */
+       if (guest_get_pmu_version() >= 2)
+               nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+
+       /*
+        * The supported bitmask for fixed counters was introduced in PMU
+        * version 5.
+        */
+       if (guest_get_pmu_version() >= 5)
+               supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
+
+       guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
+                            nr_fixed_counters, supported_bitmask);
+
+       for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
+               uint8_t vector;
+               uint64_t val;
+
+               if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
+                       vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
+                                           FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+                       __GUEST_ASSERT(vector == GP_VECTOR,
+                                      "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
+
+                       vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
+                                           FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+                       __GUEST_ASSERT(vector == GP_VECTOR,
+                                      "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
+                       continue;
+               }
+
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+               __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
+
+               GUEST_ASSERT_NE(val, 0);
+       }
+       GUEST_DONE();
+}
+
+static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+                               uint8_t nr_fixed_counters,
+                               uint32_t supported_bitmask)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
+                               supported_bitmask);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
+                               nr_fixed_counters);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void test_intel_counters(void)
+{
+       uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+       uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+       unsigned int i;
+       uint8_t v, j;
+       uint32_t k;
+
+       const uint64_t perf_caps[] = {
+               0,
+               PMU_CAP_FW_WRITES,
+       };
+
+       /*
+        * Test up to PMU v5, which is the current maximum version defined by
+        * Intel, i.e. is the last version that is guaranteed to be backwards
+        * compatible with KVM's existing behavior.
+        */
+       uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+
+       /*
+        * Detect the existence of events that aren't supported by selftests.
+        * This will (obviously) fail any time the kernel adds support for a
+        * new event, but it's worth paying that price to keep the test fresh.
+        */
+       TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+                   "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
+                   nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+
+       /*
+        * Force iterating over known arch events regardless of whether or not
+        * KVM/hardware supports a given event.
+        */
+       nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+
+       for (v = 0; v <= max_pmu_version; v++) {
+               for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
+                       if (!kvm_has_perf_caps && perf_caps[i])
+                               continue;
+
+                       pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       /*
+                        * To keep the total runtime reasonable, test every
+                        * possible non-zero, non-reserved bitmap combination
+                        * only with the native PMU version and the full bit
+                        * vector length.
+                        */
+                       if (v == pmu_version) {
+                               for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
+                                       test_arch_events(v, perf_caps[i], nr_arch_events, k);
+                       }
+                       /*
+                        * Test single bits for all PMU version and lengths up
+                        * the number of events +1 (to verify KVM doesn't do
+                        * weird things if the guest length is greater than the
+                        * host length).  Explicitly test a mask of '0' and all
+                        * ones i.e. all events being available and unavailable.
+                        */
+                       for (j = 0; j <= nr_arch_events + 1; j++) {
+                               test_arch_events(v, perf_caps[i], j, 0);
+                               test_arch_events(v, perf_caps[i], j, 0xff);
+
+                               for (k = 0; k < nr_arch_events; k++)
+                                       test_arch_events(v, perf_caps[i], j, BIT(k));
+                       }
+
+                       pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       for (j = 0; j <= nr_gp_counters; j++)
+                               test_gp_counters(v, perf_caps[i], j);
+
+                       pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       for (j = 0; j <= nr_fixed_counters; j++) {
+                               for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
+                                       test_fixed_counters(v, perf_caps[i], j, k);
+                       }
+               }
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_is_pmu_enabled());
+
+       TEST_REQUIRE(host_cpu_is_intel);
+       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+       kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+       kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
+
+       test_intel_counters();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
new file mode 100644 (file)
index 0000000..c15513c
--- /dev/null
@@ -0,0 +1,876 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+#include "kvm_util.h"
+#include "pmu.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define NUM_BRANCHES 42
+#define MAX_TEST_EVENTS                10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION                (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS                 (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS               (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
+
+struct __kvm_pmu_event_filter {
+       __u32 action;
+       __u32 nevents;
+       __u32 fixed_counter_bitmap;
+       __u32 flags;
+       __u32 pad[4];
+       __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+};
+
+/*
+ * This event list comprises Intel's known architectural events, plus AMD's
+ * Branch Instructions Retired for Zen CPUs.  Note, AMD and Intel use the
+ * same encoding for Instructions Retired.
+ */
+kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+static const struct __kvm_pmu_event_filter base_event_filter = {
+       .nevents = ARRAY_SIZE(base_event_filter.events),
+       .events = {
+               INTEL_ARCH_CPU_CYCLES,
+               INTEL_ARCH_INSTRUCTIONS_RETIRED,
+               INTEL_ARCH_REFERENCE_CYCLES,
+               INTEL_ARCH_LLC_REFERENCES,
+               INTEL_ARCH_LLC_MISSES,
+               INTEL_ARCH_BRANCHES_RETIRED,
+               INTEL_ARCH_BRANCHES_MISPREDICTED,
+               INTEL_ARCH_TOPDOWN_SLOTS,
+               AMD_ZEN_BRANCHES_RETIRED,
+       },
+};
+
+struct {
+       uint64_t loads;
+       uint64_t stores;
+       uint64_t loads_stores;
+       uint64_t branches_retired;
+       uint64_t instructions_retired;
+} pmc_results;
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+       GUEST_SYNC(-EFAULT);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+       uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+       wrmsr(msr, v);
+       if (rdmsr(msr) != v)
+               GUEST_SYNC(-EIO);
+
+       v ^= bits_to_flip;
+       wrmsr(msr, v);
+       if (rdmsr(msr) != v)
+               GUEST_SYNC(-EIO);
+}
+
+static void run_and_measure_loop(uint32_t msr_base)
+{
+       const uint64_t branches_retired = rdmsr(msr_base + 0);
+       const uint64_t insn_retired = rdmsr(msr_base + 1);
+
+       __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+
+       pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
+       pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
+}
+
+static void intel_guest_code(void)
+{
+       check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+       check_msr(MSR_P6_EVNTSEL0, 0xffff);
+       check_msr(MSR_IA32_PMC0, 0xffff);
+       GUEST_SYNC(0);
+
+       for (;;) {
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
+               wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+
+               run_and_measure_loop(MSR_IA32_PMC0);
+               GUEST_SYNC(0);
+       }
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+       check_msr(MSR_K7_EVNTSEL0, 0xffff);
+       check_msr(MSR_K7_PERFCTR0, 0xffff);
+       GUEST_SYNC(0);
+
+       for (;;) {
+               wrmsr(MSR_K7_EVNTSEL0, 0);
+               wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
+               wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+               run_and_measure_loop(MSR_K7_PERFCTR0);
+               GUEST_SYNC(0);
+       }
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       get_ucall(vcpu, &uc);
+       TEST_ASSERT(uc.cmd == UCALL_SYNC,
+                   "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+       return uc.args[1];
+}
+
+static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
+{
+       uint64_t r;
+
+       memset(&pmc_results, 0, sizeof(pmc_results));
+       sync_global_to_guest(vcpu->vm, pmc_results);
+
+       r = run_vcpu_to_sync(vcpu);
+       TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
+
+       sync_global_from_guest(vcpu->vm, pmc_results);
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
+{
+       uint64_t r;
+
+       vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+       r = run_vcpu_to_sync(vcpu);
+       vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
+
+       return !r;
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
+{
+       bool found = false;
+       int i;
+
+       for (i = 0; i < f->nevents; i++) {
+               if (found)
+                       f->events[i - 1] = f->events[i];
+               else
+                       found = f->events[i] == event;
+       }
+       if (found)
+               f->nevents--;
+}
+
+#define ASSERT_PMC_COUNTING_INSTRUCTIONS()                                             \
+do {                                                                                   \
+       uint64_t br = pmc_results.branches_retired;                                     \
+       uint64_t ir = pmc_results.instructions_retired;                                 \
+                                                                                       \
+       if (br && br != NUM_BRANCHES)                                                   \
+               pr_info("%s: Branch instructions retired = %lu (expected %u)\n",        \
+                       __func__, br, NUM_BRANCHES);                                    \
+       TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)",         \
+                   __func__, br);                                                      \
+       TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)",                \
+                   __func__, ir);                                                      \
+} while (0)
+
+#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS()                                         \
+do {                                                                                   \
+       uint64_t br = pmc_results.branches_retired;                                     \
+       uint64_t ir = pmc_results.instructions_retired;                                 \
+                                                                                       \
+       TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)",          \
+                   __func__, br);                                                      \
+       TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)",                 \
+                   __func__, ir);                                                      \
+} while (0)
+
+static void test_without_filter(struct kvm_vcpu *vcpu)
+{
+       run_vcpu_and_sync_pmc_results(vcpu);
+
+       ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_with_filter(struct kvm_vcpu *vcpu,
+                            struct __kvm_pmu_event_filter *__f)
+{
+       struct kvm_pmu_event_filter *f = (void *)__f;
+
+       vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+       run_vcpu_and_sync_pmc_results(vcpu);
+}
+
+static void test_amd_deny_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = {
+               .action = KVM_PMU_EVENT_DENY,
+               .nevents = 1,
+               .events = {
+                       RAW_EVENT(0x1C2, 0),
+               },
+       };
+
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_deny_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_DENY;
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_allow_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_ALLOW;
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_DENY;
+
+       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_ALLOW;
+
+       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+       test_with_filter(vcpu, &f);
+
+       ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+/*
+ * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
+ *
+ * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
+ */
+static void test_pmu_config_disable(void (*guest_code)(void))
+{
+       struct kvm_vcpu *vcpu;
+       int r;
+       struct kvm_vm *vm;
+
+       r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
+       if (!(r & KVM_PMU_CAP_DISABLE))
+               return;
+
+       vm = vm_create(1);
+
+       vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
+
+       vcpu = vm_vcpu_add(vm, 0, guest_code);
+       TEST_ASSERT(!sanity_check_pmu(vcpu),
+                   "Guest should not be able to use disabled PMU.");
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * On Intel, check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, and support for counting the number of branch
+ * instructions retired.
+ */
+static bool use_intel_pmu(void)
+{
+       return host_cpu_is_intel &&
+              kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
+              kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
+              kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
+}
+
+/*
+ * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding
+ * 0xc2,0 for Branch Instructions Retired.
+ */
+static bool use_amd_pmu(void)
+{
+       return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
+}
+
+/*
+ * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
+ * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
+ * supported on Intel Xeon processors:
+ *  - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
+ */
+#define MEM_INST_RETIRED               0xD0
+#define MEM_INST_RETIRED_LOAD          RAW_EVENT(MEM_INST_RETIRED, 0x81)
+#define MEM_INST_RETIRED_STORE         RAW_EVENT(MEM_INST_RETIRED, 0x82)
+#define MEM_INST_RETIRED_LOAD_STORE    RAW_EVENT(MEM_INST_RETIRED, 0x83)
+
+static bool supports_event_mem_inst_retired(void)
+{
+       uint32_t eax, ebx, ecx, edx;
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+       if (x86_family(eax) == 0x6) {
+               switch (x86_model(eax)) {
+               /* Sapphire Rapids */
+               case 0x8F:
+               /* Ice Lake */
+               case 0x6A:
+               /* Skylake */
+               /* Cascade Lake */
+               case 0x55:
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+/*
+ * "LS Dispatch", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+#define LS_DISPATCH            0x29
+#define LS_DISPATCH_LOAD       RAW_EVENT(LS_DISPATCH, BIT(0))
+#define LS_DISPATCH_STORE      RAW_EVENT(LS_DISPATCH, BIT(1))
+#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
+
+#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
+       KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
+#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
+       KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
+
+static void masked_events_guest_test(uint32_t msr_base)
+{
+       /*
+        * The actual value of the counters don't determine the outcome of
+        * the test.  Only that they are zero or non-zero.
+        */
+       const uint64_t loads = rdmsr(msr_base + 0);
+       const uint64_t stores = rdmsr(msr_base + 1);
+       const uint64_t loads_stores = rdmsr(msr_base + 2);
+       int val;
+
+
+       __asm__ __volatile__("movl $0, %[v];"
+                            "movl %[v], %%eax;"
+                            "incl %[v];"
+                            : [v]"+m"(val) :: "eax");
+
+       pmc_results.loads = rdmsr(msr_base + 0) - loads;
+       pmc_results.stores = rdmsr(msr_base + 1) - stores;
+       pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
+}
+
+static void intel_masked_events_guest_code(void)
+{
+       for (;;) {
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+               wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
+               wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
+               wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
+
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
+
+               masked_events_guest_test(MSR_IA32_PMC0);
+               GUEST_SYNC(0);
+       }
+}
+
+static void amd_masked_events_guest_code(void)
+{
+       for (;;) {
+               wrmsr(MSR_K7_EVNTSEL0, 0);
+               wrmsr(MSR_K7_EVNTSEL1, 0);
+               wrmsr(MSR_K7_EVNTSEL2, 0);
+
+               wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
+               wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
+               wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
+                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
+
+               masked_events_guest_test(MSR_K7_PERFCTR0);
+               GUEST_SYNC(0);
+       }
+}
+
+static void run_masked_events_test(struct kvm_vcpu *vcpu,
+                                  const uint64_t masked_events[],
+                                  const int nmasked_events)
+{
+       struct __kvm_pmu_event_filter f = {
+               .nevents = nmasked_events,
+               .action = KVM_PMU_EVENT_ALLOW,
+               .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+       };
+
+       memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+       test_with_filter(vcpu, &f);
+}
+
+#define ALLOW_LOADS            BIT(0)
+#define ALLOW_STORES           BIT(1)
+#define ALLOW_LOADS_STORES     BIT(2)
+
+struct masked_events_test {
+       uint64_t intel_events[MAX_TEST_EVENTS];
+       uint64_t intel_event_end;
+       uint64_t amd_events[MAX_TEST_EVENTS];
+       uint64_t amd_event_end;
+       const char *msg;
+       uint32_t flags;
+};
+
+/*
+ * These are the test cases for the masked events tests.
+ *
+ * For each test, the guest enables 3 PMU counters (loads, stores,
+ * loads + stores).  The filter is then set in KVM with the masked events
+ * provided.  The test then verifies that the counters agree with which
+ * ones should be counting and which ones should be filtered.
+ */
+const struct masked_events_test test_cases[] = {
+       {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+               },
+               .msg = "Only allow loads.",
+               .flags = ALLOW_LOADS,
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+               },
+               .msg = "Only allow stores.",
+               .flags = ALLOW_STORES,
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
+               },
+               .msg = "Only allow loads + stores.",
+               .flags = ALLOW_LOADS_STORES,
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+                       EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
+               },
+               .msg = "Only allow loads and stores.",
+               .flags = ALLOW_LOADS | ALLOW_STORES,
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+                       EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+                       EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+               },
+               .msg = "Only allow loads and loads + stores.",
+               .flags = ALLOW_LOADS | ALLOW_LOADS_STORES
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+                       EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+               },
+               .msg = "Only allow stores and loads + stores.",
+               .flags = ALLOW_STORES | ALLOW_LOADS_STORES
+       }, {
+               .intel_events = {
+                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+               },
+               .amd_events = {
+                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+               },
+               .msg = "Only allow loads, stores, and loads + stores.",
+               .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
+       },
+};
+
+static int append_test_events(const struct masked_events_test *test,
+                             uint64_t *events, int nevents)
+{
+       const uint64_t *evts;
+       int i;
+
+       evts = use_intel_pmu() ? test->intel_events : test->amd_events;
+       for (i = 0; i < MAX_TEST_EVENTS; i++) {
+               if (evts[i] == 0)
+                       break;
+
+               events[nevents + i] = evts[i];
+       }
+
+       return nevents + i;
+}
+
+static bool bool_eq(bool a, bool b)
+{
+       return a == b;
+}
+
+static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
+                                   int nevents)
+{
+       int ntests = ARRAY_SIZE(test_cases);
+       int i, n;
+
+       for (i = 0; i < ntests; i++) {
+               const struct masked_events_test *test = &test_cases[i];
+
+               /* Do any test case events overflow MAX_TEST_EVENTS? */
+               assert(test->intel_event_end == 0);
+               assert(test->amd_event_end == 0);
+
+               n = append_test_events(test, events, nevents);
+
+               run_masked_events_test(vcpu, events, n);
+
+               TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
+                           bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
+                           bool_eq(pmc_results.loads_stores,
+                                   test->flags & ALLOW_LOADS_STORES),
+                           "%s  loads: %lu, stores: %lu, loads + stores: %lu",
+                           test->msg, pmc_results.loads, pmc_results.stores,
+                           pmc_results.loads_stores);
+       }
+}
+
+static void add_dummy_events(uint64_t *events, int nevents)
+{
+       int i;
+
+       for (i = 0; i < nevents; i++) {
+               int event_select = i % 0xFF;
+               bool exclude = ((i % 4) == 0);
+
+               if (event_select == MEM_INST_RETIRED ||
+                   event_select == LS_DISPATCH)
+                       event_select++;
+
+               events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
+                                                       0, exclude);
+       }
+}
+
+static void test_masked_events(struct kvm_vcpu *vcpu)
+{
+       int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
+       uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+
+       /* Run the test cases against a sparse PMU event filter. */
+       run_masked_events_tests(vcpu, events, 0);
+
+       /* Run the test cases against a dense PMU event filter. */
+       add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
+       run_masked_events_tests(vcpu, events, nevents);
+}
+
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+                               struct __kvm_pmu_event_filter *__f)
+{
+       struct kvm_pmu_event_filter *f = (void *)__f;
+
+       return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
+
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+                                      uint32_t flags, uint32_t action)
+{
+       struct __kvm_pmu_event_filter f = {
+               .nevents = 1,
+               .flags = flags,
+               .action = action,
+               .events = {
+                       event,
+               },
+       };
+
+       return set_pmu_event_filter(vcpu, &f);
+}
+
+static void test_filter_ioctl(struct kvm_vcpu *vcpu)
+{
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct __kvm_pmu_event_filter f;
+       uint64_t e = ~0ul;
+       int r;
+
+       /*
+        * Unfortunately having invalid bits set in event data is expected to
+        * pass when flags == 0 (bits other than eventsel+umask).
+        */
+       r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
+       TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
+       TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
+
+       e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
+       TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+       f = base_event_filter;
+       f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+       f = base_event_filter;
+       f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+       f = base_event_filter;
+       f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+       f = base_event_filter;
+       f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t idx)
+{
+       for (;;) {
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
+
+               /* Only OS_EN bit is enabled for fixed counter[idx]. */
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
+               __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
+       }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+                                              uint32_t action, uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = {
+               .action = action,
+               .fixed_counter_bitmap = bitmap,
+       };
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+                                                  uint32_t action,
+                                                  uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = action;
+       f.fixed_counter_bitmap = bitmap;
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+                                       uint8_t nr_fixed_counters)
+{
+       unsigned int i;
+       uint32_t bitmap;
+       uint64_t count;
+
+       TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+                   "Invalid nr_fixed_counters");
+
+       /*
+        * Check the fixed performance counter can count normally when KVM
+        * userspace doesn't set any pmu filter.
+        */
+       count = run_vcpu_to_sync(vcpu);
+       TEST_ASSERT(count, "Unexpected count value: %ld", count);
+
+       for (i = 0; i < BIT(nr_fixed_counters); i++) {
+               bitmap = BIT(i);
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+               /*
+                * Check that fixed_counter_bitmap has higher priority than
+                * events[] when both are set.
+                */
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_ALLOW,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_DENY,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+       }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       uint8_t idx;
+
+       /*
+        * Check that pmu_event_filter works as expected when it's applied to
+        * fixed performance counters.
+        */
+       for (idx = 0; idx < nr_fixed_counters; idx++) {
+               vm = vm_create_with_one_vcpu(&vcpu,
+                                            intel_run_fixed_counter_guest_code);
+               vcpu_args_set(vcpu, 1, idx);
+               __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+               kvm_vm_free(vm);
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       void (*guest_code)(void);
+       struct kvm_vcpu *vcpu, *vcpu2 = NULL;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_is_pmu_enabled());
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
+
+       TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
+       guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       TEST_REQUIRE(sanity_check_pmu(vcpu));
+
+       if (use_amd_pmu())
+               test_amd_deny_list(vcpu);
+
+       test_without_filter(vcpu);
+       test_member_deny_list(vcpu);
+       test_member_allow_list(vcpu);
+       test_not_member_deny_list(vcpu);
+       test_not_member_allow_list(vcpu);
+
+       if (use_intel_pmu() &&
+           supports_event_mem_inst_retired() &&
+           kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
+               vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
+       else if (use_amd_pmu())
+               vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
+
+       if (vcpu2)
+               test_masked_events(vcpu2);
+       test_filter_ioctl(vcpu);
+
+       kvm_vm_free(vm);
+
+       test_pmu_config_disable(guest_code);
+       test_fixed_counter_bitmap();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
new file mode 100644 (file)
index 0000000..82a8d88
--- /dev/null
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/memfd.h>
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define BASE_DATA_SLOT         10
+#define BASE_DATA_GPA          ((uint64_t)(1ull << 32))
+#define PER_CPU_DATA_SIZE      ((uint64_t)(SZ_2M + PAGE_SIZE))
+
+/* Horrific macro so that the line info is captured accurately :-( */
+#define memcmp_g(gpa, pattern,  size)                                                          \
+do {                                                                                           \
+       uint8_t *mem = (uint8_t *)gpa;                                                          \
+       size_t i;                                                                               \
+                                                                                               \
+       for (i = 0; i < size; i++)                                                              \
+               __GUEST_ASSERT(mem[i] == pattern,                                               \
+                              "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x",       \
+                              pattern, i, gpa + i, mem[i]);                                    \
+} while (0)
+
+static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
+{
+       size_t i;
+
+       for (i = 0; i < size; i++)
+               TEST_ASSERT(mem[i] == pattern,
+                           "Host expected 0x%x at gpa 0x%lx, got 0x%x",
+                           pattern, gpa + i, mem[i]);
+}
+
+/*
+ * Run memory conversion tests with explicit conversion:
+ * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
+ * to back/unback private memory. Subsequent accesses by guest to the gpa range
+ * will not cause exit to userspace.
+ *
+ * Test memory conversion scenarios with following steps:
+ * 1) Access private memory using private access and verify that memory contents
+ *   are not visible to userspace.
+ * 2) Convert memory to shared using explicit conversions and ensure that
+ *   userspace is able to access the shared regions.
+ * 3) Convert memory back to private using explicit conversions and ensure that
+ *   userspace is again not able to access converted private regions.
+ */
+
+#define GUEST_STAGE(o, s) { .offset = o, .size = s }
+
+enum ucall_syncs {
+       SYNC_SHARED,
+       SYNC_PRIVATE,
+};
+
+static void guest_sync_shared(uint64_t gpa, uint64_t size,
+                             uint8_t current_pattern, uint8_t new_pattern)
+{
+       GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
+}
+
+static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
+{
+       GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
+}
+
+/* Arbitrary values, KVM doesn't care about the attribute flags. */
+#define MAP_GPA_SET_ATTRIBUTES BIT(0)
+#define MAP_GPA_SHARED         BIT(1)
+#define MAP_GPA_DO_FALLOCATE   BIT(2)
+
+static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
+                         bool do_fallocate)
+{
+       uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
+
+       if (map_shared)
+               flags |= MAP_GPA_SHARED;
+       if (do_fallocate)
+               flags |= MAP_GPA_DO_FALLOCATE;
+       kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+       guest_map_mem(gpa, size, true, do_fallocate);
+}
+
+static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+       guest_map_mem(gpa, size, false, do_fallocate);
+}
+
+struct {
+       uint64_t offset;
+       uint64_t size;
+} static const test_ranges[] = {
+       GUEST_STAGE(0, PAGE_SIZE),
+       GUEST_STAGE(0, SZ_2M),
+       GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
+       GUEST_STAGE(PAGE_SIZE, SZ_2M),
+       GUEST_STAGE(SZ_2M, PAGE_SIZE),
+};
+
+static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
+{
+       const uint8_t def_p = 0xaa;
+       const uint8_t init_p = 0xcc;
+       uint64_t j;
+       int i;
+
+       /* Memory should be shared by default. */
+       memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
+       memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
+       guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
+
+       memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+
+       for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+               uint64_t gpa = base_gpa + test_ranges[i].offset;
+               uint64_t size = test_ranges[i].size;
+               uint8_t p1 = 0x11;
+               uint8_t p2 = 0x22;
+               uint8_t p3 = 0x33;
+               uint8_t p4 = 0x44;
+
+               /*
+                * Set the test region to pattern one to differentiate it from
+                * the data range as a whole (contains the initial pattern).
+                */
+               memset((void *)gpa, p1, size);
+
+               /*
+                * Convert to private, set and verify the private data, and
+                * then verify that the rest of the data (map shared) still
+                * holds the initial pattern, and that the host always sees the
+                * shared memory (initial pattern).  Unlike shared memory,
+                * punching a hole in private memory is destructive, i.e.
+                * previous values aren't guaranteed to be preserved.
+                */
+               guest_map_private(gpa, size, do_fallocate);
+
+               if (size > PAGE_SIZE) {
+                       memset((void *)gpa, p2, PAGE_SIZE);
+                       goto skip;
+               }
+
+               memset((void *)gpa, p2, size);
+               guest_sync_private(gpa, size, p1);
+
+               /*
+                * Verify that the private memory was set to pattern two, and
+                * that shared memory still holds the initial pattern.
+                */
+               memcmp_g(gpa, p2, size);
+               if (gpa > base_gpa)
+                       memcmp_g(base_gpa, init_p, gpa - base_gpa);
+               if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
+                       memcmp_g(gpa + size, init_p,
+                                (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
+
+               /*
+                * Convert odd-number page frames back to shared to verify KVM
+                * also correctly handles holes in private ranges.
+                */
+               for (j = 0; j < size; j += PAGE_SIZE) {
+                       if ((j >> PAGE_SHIFT) & 1) {
+                               guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
+                               guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
+
+                               memcmp_g(gpa + j, p3, PAGE_SIZE);
+                       } else {
+                               guest_sync_private(gpa + j, PAGE_SIZE, p1);
+                       }
+               }
+
+skip:
+               /*
+                * Convert the entire region back to shared, explicitly write
+                * pattern three to fill in the even-number frames before
+                * asking the host to verify (and write pattern four).
+                */
+               guest_map_shared(gpa, size, do_fallocate);
+               memset((void *)gpa, p3, size);
+               guest_sync_shared(gpa, size, p3, p4);
+               memcmp_g(gpa, p4, size);
+
+               /* Reset the shared memory back to the initial pattern. */
+               memset((void *)gpa, init_p, size);
+
+               /*
+                * Free (via PUNCH_HOLE) *all* private memory so that the next
+                * iteration starts from a clean slate, e.g. with respect to
+                * whether or not there are pages/folios in guest_mem.
+                */
+               guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
+       }
+}
+
+static void guest_punch_hole(uint64_t gpa, uint64_t size)
+{
+       /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
+       uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
+
+       kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+/*
+ * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
+ * proper conversion.  Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
+ * (subsequent fault) should zero memory.
+ */
+static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
+{
+       const uint8_t init_p = 0xcc;
+       int i;
+
+       /*
+        * Convert the entire range to private, this testcase is all about
+        * punching holes in guest_memfd, i.e. shared mappings aren't needed.
+        */
+       guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
+
+       for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+               uint64_t gpa = base_gpa + test_ranges[i].offset;
+               uint64_t size = test_ranges[i].size;
+
+               /*
+                * Free all memory before each iteration, even for the !precise
+                * case where the memory will be faulted back in.  Freeing and
+                * reallocating should obviously work, and freeing all memory
+                * minimizes the probability of cross-testcase influence.
+                */
+               guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
+
+               /* Fault-in and initialize memory, and verify the pattern. */
+               if (precise) {
+                       memset((void *)gpa, init_p, size);
+                       memcmp_g(gpa, init_p, size);
+               } else {
+                       memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
+                       memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+               }
+
+               /*
+                * Punch a hole at the target range and verify that reads from
+                * the guest succeed and return zeroes.
+                */
+               guest_punch_hole(gpa, size);
+               memcmp_g(gpa, 0, size);
+       }
+}
+
+static void guest_code(uint64_t base_gpa)
+{
+       /*
+        * Run the conversion test twice, with and without doing fallocate() on
+        * the guest_memfd backing when converting between shared and private.
+        */
+       guest_test_explicit_conversion(base_gpa, false);
+       guest_test_explicit_conversion(base_gpa, true);
+
+       /*
+        * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
+        * faulted in, once with only the target range faulted in.
+        */
+       guest_test_punch_hole(base_gpa, false);
+       guest_test_punch_hole(base_gpa, true);
+       GUEST_DONE();
+}
+
+static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       uint64_t gpa = run->hypercall.args[0];
+       uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
+       bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
+       bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
+       bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
+       struct kvm_vm *vm = vcpu->vm;
+
+       TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
+                   "Wanted MAP_GPA_RANGE (%u), got '%llu'",
+                   KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
+
+       if (do_fallocate)
+               vm_guest_mem_fallocate(vm, gpa, size, map_shared);
+
+       if (set_attributes)
+               vm_set_memory_attributes(vm, gpa, size,
+                                        map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
+       run->hypercall.ret = 0;
+}
+
+static bool run_vcpus;
+
+static void *__test_mem_conversions(void *__vcpu)
+{
+       struct kvm_vcpu *vcpu = __vcpu;
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vm *vm = vcpu->vm;
+       struct ucall uc;
+
+       while (!READ_ONCE(run_vcpus))
+               ;
+
+       for ( ;; ) {
+               vcpu_run(vcpu);
+
+               if (run->exit_reason == KVM_EXIT_HYPERCALL) {
+                       handle_exit_hypercall(vcpu);
+                       continue;
+               }
+
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                           "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
+                           run->exit_reason, exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               case UCALL_SYNC: {
+                       uint64_t gpa  = uc.args[1];
+                       size_t size = uc.args[2];
+                       size_t i;
+
+                       TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
+                                   uc.args[0] == SYNC_PRIVATE,
+                                   "Unknown sync command '%ld'", uc.args[0]);
+
+                       for (i = 0; i < size; i += vm->page_size) {
+                               size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
+                               uint8_t *hva = addr_gpa2hva(vm, gpa + i);
+
+                               /* In all cases, the host should observe the shared data. */
+                               memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
+
+                               /* For shared, write the new pattern to guest memory. */
+                               if (uc.args[0] == SYNC_SHARED)
+                                       memset(hva, uc.args[4], nr_bytes);
+                       }
+                       break;
+               }
+               case UCALL_DONE:
+                       return NULL;
+               default:
+                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+               }
+       }
+}
+
+static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
+                                uint32_t nr_memslots)
+{
+       /*
+        * Allocate enough memory so that each vCPU's chunk of memory can be
+        * naturally aligned with respect to the size of the backing store.
+        */
+       const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
+       const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
+       const size_t memfd_size = per_cpu_size * nr_vcpus;
+       const size_t slot_size = memfd_size / nr_memslots;
+       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+       pthread_t threads[KVM_MAX_VCPUS];
+       struct kvm_vm *vm;
+       int memfd, i, r;
+
+       const struct vm_shape shape = {
+               .mode = VM_MODE_DEFAULT,
+               .type = KVM_X86_SW_PROTECTED_VM,
+       };
+
+       TEST_ASSERT(slot_size * nr_memslots == memfd_size,
+                   "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
+                   memfd_size, nr_memslots);
+       vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
+
+       vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
+
+       memfd = vm_create_guest_memfd(vm, memfd_size, 0);
+
+       for (i = 0; i < nr_memslots; i++)
+               vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
+                          BASE_DATA_SLOT + i, slot_size / vm->page_size,
+                          KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
+
+       for (i = 0; i < nr_vcpus; i++) {
+               uint64_t gpa =  BASE_DATA_GPA + i * per_cpu_size;
+
+               vcpu_args_set(vcpus[i], 1, gpa);
+
+               /*
+                * Map only what is needed so that an out-of-bounds access
+                * results #PF => SHUTDOWN instead of data corruption.
+                */
+               virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
+
+               pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
+       }
+
+       WRITE_ONCE(run_vcpus, true);
+
+       for (i = 0; i < nr_vcpus; i++)
+               pthread_join(threads[i], NULL);
+
+       kvm_vm_free(vm);
+
+       /*
+        * Allocate and free memory from the guest_memfd after closing the VM
+        * fd.  The guest_memfd is gifted a reference to its owning VM, i.e.
+        * should prevent the VM from being fully destroyed until the last
+        * reference to the guest_memfd is also put.
+        */
+       r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+       TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+       r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
+       TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+       close(memfd);
+}
+
+static void usage(const char *cmd)
+{
+       puts("");
+       printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
+       puts("");
+       backing_src_help("-s");
+       puts("");
+       puts(" -n: specify the number of vcpus (default: 1)");
+       puts("");
+       puts(" -m: specify the number of memslots (default: 1)");
+       puts("");
+}
+
+int main(int argc, char *argv[])
+{
+       enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
+       uint32_t nr_memslots = 1;
+       uint32_t nr_vcpus = 1;
+       int opt;
+
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+       while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
+               switch (opt) {
+               case 's':
+                       src_type = parse_backing_src_type(optarg);
+                       break;
+               case 'n':
+                       nr_vcpus = atoi_positive("nr_vcpus", optarg);
+                       break;
+               case 'm':
+                       nr_memslots = atoi_positive("nr_memslots", optarg);
+                       break;
+               case 'h':
+               default:
+                       usage(argv[0]);
+                       exit(0);
+               }
+       }
+
+       test_mem_conversions(src_type, nr_vcpus, nr_memslots);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
new file mode 100644 (file)
index 0000000..13e72fc
--- /dev/null
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <linux/kvm.h>
+#include <pthread.h>
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* Arbitrarily selected to avoid overlaps with anything else */
+#define EXITS_TEST_GVA 0xc0000000
+#define EXITS_TEST_GPA EXITS_TEST_GVA
+#define EXITS_TEST_NPAGES 1
+#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
+#define EXITS_TEST_SLOT 10
+
+static uint64_t guest_repeatedly_read(void)
+{
+       volatile uint64_t value;
+
+       while (true)
+               value = *((uint64_t *) EXITS_TEST_GVA);
+
+       return value;
+}
+
+static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+       int r;
+
+       r = _vcpu_run(vcpu);
+       if (r) {
+               TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
+               TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
+       }
+       return vcpu->run->exit_reason;
+}
+
+const struct vm_shape protected_vm_shape = {
+       .mode = VM_MODE_DEFAULT,
+       .type = KVM_X86_SW_PROTECTED_VM,
+};
+
+static void test_private_access_memslot_deleted(void)
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       pthread_t vm_thread;
+       void *thread_return;
+       uint32_t exit_reason;
+
+       vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+                                          guest_repeatedly_read);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   EXITS_TEST_GPA, EXITS_TEST_SLOT,
+                                   EXITS_TEST_NPAGES,
+                                   KVM_MEM_GUEST_MEMFD);
+
+       virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+       /* Request to access page privately */
+       vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+       pthread_create(&vm_thread, NULL,
+                      (void *(*)(void *))run_vcpu_get_exit_reason,
+                      (void *)vcpu);
+
+       vm_mem_region_delete(vm, EXITS_TEST_SLOT);
+
+       pthread_join(vm_thread, &thread_return);
+       exit_reason = (uint32_t)(uint64_t)thread_return;
+
+       TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+       kvm_vm_free(vm);
+}
+
+static void test_private_access_memslot_not_private(void)
+{
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       uint32_t exit_reason;
+
+       vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+                                          guest_repeatedly_read);
+
+       /* Add a non-private memslot (flags = 0) */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   EXITS_TEST_GPA, EXITS_TEST_SLOT,
+                                   EXITS_TEST_NPAGES, 0);
+
+       virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+       /* Request to access page privately */
+       vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+       exit_reason = run_vcpu_get_exit_reason(vcpu);
+
+       TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+       TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+       test_private_access_memslot_deleted();
+       test_private_access_memslot_not_private();
+}
diff --git a/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c
new file mode 100644 (file)
index 0000000..cbc92a8
--- /dev/null
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test edge cases and race conditions in kvm_recalculate_apic_map().
+ */
+
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <time.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "apic.h"
+
+#define TIMEOUT                5       /* seconds */
+
+#define LAPIC_DISABLED 0
+#define LAPIC_X2APIC   (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
+#define MAX_XAPIC_ID   0xff
+
+static void *race(void *arg)
+{
+       struct kvm_lapic_state lapic = {};
+       struct kvm_vcpu *vcpu = arg;
+
+       while (1) {
+               /* Trigger kvm_recalculate_apic_map(). */
+               vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+       struct kvm_vcpu *vcpuN;
+       struct kvm_vm *vm;
+       pthread_t thread;
+       time_t t;
+       int i;
+
+       kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
+
+       /*
+        * Create the max number of vCPUs supported by selftests so that KVM
+        * has decent amount of work to do when recalculating the map, i.e. to
+        * make the problematic window large enough to hit.
+        */
+       vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
+
+       /*
+        * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
+        * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
+        */
+       for (i = 0; i < KVM_MAX_VCPUS; i++)
+               vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
+
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+
+       vcpuN = vcpus[KVM_MAX_VCPUS - 1];
+       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+               vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
+               vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
+       }
+
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
new file mode 100644 (file)
index 0000000..4991378
--- /dev/null
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+static void guest_bsp_vcpu(void *arg)
+{
+       GUEST_SYNC(1);
+
+       GUEST_ASSERT_NE(get_bsp_flag(), 0);
+
+       GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+       GUEST_SYNC(1);
+
+       GUEST_ASSERT_EQ(get_bsp_flag(), 0);
+
+       GUEST_DONE();
+}
+
+static void test_set_invalid_bsp(struct kvm_vm *vm)
+{
+       unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+       int r;
+
+       if (max_vcpu_id) {
+               r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1));
+               TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail");
+       }
+
+       r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32));
+       TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail");
+}
+
+static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
+{
+       int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
+                          (void *)(unsigned long)vcpu->id);
+
+       TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+       int stage;
+
+       for (stage = 0; stage < 2; stage++) {
+
+               vcpu_run(vcpu);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                                       uc.args[1] == stage + 1,
+                                       "Stage %d: Unexpected register values vmexit, got %lx",
+                                       stage + 1, (ulong)uc.args[1]);
+                       test_set_bsp_busy(vcpu, "while running vm");
+                       break;
+               case UCALL_DONE:
+                       TEST_ASSERT(stage == 1,
+                                       "Expected GUEST_DONE in stage 2, got stage %d",
+                                       stage);
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_ASSERT(false, "Unexpected exit: %s",
+                                   exit_reason_str(vcpu->run->exit_reason));
+               }
+       }
+}
+
+static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+                               struct kvm_vcpu *vcpus[])
+{
+       struct kvm_vm *vm;
+       uint32_t i;
+
+       vm = vm_create(nr_vcpus);
+
+       test_set_invalid_bsp(vm);
+
+       vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
+
+       for (i = 0; i < nr_vcpus; i++)
+               vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
+                                                                guest_not_bsp_vcpu);
+       return vm;
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu_id)
+{
+       struct kvm_vcpu *vcpus[2];
+       struct kvm_vm *vm;
+
+       vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
+
+       run_vcpu(vcpus[0]);
+       run_vcpu(vcpus[1]);
+
+       kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+       struct kvm_vcpu *vcpus[2];
+       struct kvm_vm *vm;
+
+       vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
+
+       test_set_bsp_busy(vcpus[1], "after adding vcpu");
+
+       run_vcpu(vcpus[0]);
+       run_vcpu(vcpus[1]);
+
+       test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
+
+       run_vm_bsp(0);
+       run_vm_bsp(1);
+       run_vm_bsp(0);
+
+       check_set_bsp_busy();
+}
diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c
new file mode 100644 (file)
index 0000000..c021c07
--- /dev/null
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit)                               \
+do {                                                                           \
+       struct kvm_sregs new;                                                   \
+       int rc;                                                                 \
+                                                                               \
+       /* Skip the sub-test, the feature/bit is supported. */                  \
+       if (orig.cr & bit)                                                      \
+               break;                                                          \
+                                                                               \
+       memcpy(&new, &orig, sizeof(sregs));                                     \
+       new.cr |= bit;                                                          \
+                                                                               \
+       rc = _vcpu_sregs_set(vcpu, &new);                                       \
+       TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit);        \
+                                                                               \
+       /* Sanity check that KVM didn't change anything. */                     \
+       vcpu_sregs_get(vcpu, &new);                                             \
+       TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs");   \
+} while (0)
+
+static uint64_t calc_supported_cr4_feature_bits(void)
+{
+       uint64_t cr4;
+
+       cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
+             X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
+             X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+       if (kvm_cpu_has(X86_FEATURE_UMIP))
+               cr4 |= X86_CR4_UMIP;
+       if (kvm_cpu_has(X86_FEATURE_LA57))
+               cr4 |= X86_CR4_LA57;
+       if (kvm_cpu_has(X86_FEATURE_VMX))
+               cr4 |= X86_CR4_VMXE;
+       if (kvm_cpu_has(X86_FEATURE_SMX))
+               cr4 |= X86_CR4_SMXE;
+       if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
+               cr4 |= X86_CR4_FSGSBASE;
+       if (kvm_cpu_has(X86_FEATURE_PCID))
+               cr4 |= X86_CR4_PCIDE;
+       if (kvm_cpu_has(X86_FEATURE_XSAVE))
+               cr4 |= X86_CR4_OSXSAVE;
+       if (kvm_cpu_has(X86_FEATURE_SMEP))
+               cr4 |= X86_CR4_SMEP;
+       if (kvm_cpu_has(X86_FEATURE_SMAP))
+               cr4 |= X86_CR4_SMAP;
+       if (kvm_cpu_has(X86_FEATURE_PKU))
+               cr4 |= X86_CR4_PKE;
+
+       return cr4;
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_sregs sregs;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t cr4;
+       int rc, i;
+
+       /*
+        * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+        * use it to verify all supported CR4 bits can be set prior to defining
+        * the vCPU model, i.e. without doing KVM_SET_CPUID2.
+        */
+       vm = vm_create_barebones();
+       vcpu = __vm_vcpu_add(vm, 0);
+
+       vcpu_sregs_get(vcpu, &sregs);
+
+       sregs.cr0 = 0;
+       sregs.cr4 |= calc_supported_cr4_feature_bits();
+       cr4 = sregs.cr4;
+
+       rc = _vcpu_sregs_set(vcpu, &sregs);
+       TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+                   sregs.cr4, cr4);
+
+       /* Verify all unsupported features are rejected by KVM. */
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
+
+       for (i = 32; i < 64; i++)
+               TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
+
+       /* NW without CD is illegal, as is PG without PE. */
+       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
+       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+
+       kvm_vm_free(vm);
+
+       /* Create a "real" VM and verify APIC_BASE can be set. */
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       sregs.apic_base = 1 << 10;
+       rc = _vcpu_sregs_set(vcpu, &sregs);
+       TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+                   sregs.apic_base);
+       sregs.apic_base = 1 << 11;
+       rc = _vcpu_sregs_set(vcpu, &sregs);
+       TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+                   sregs.apic_base);
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
new file mode 100644 (file)
index 0000000..3fb967f
--- /dev/null
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+#define SVM_SEV_FEAT_DEBUG_SWAP 32u
+
+/*
+ * Some features may have hidden dependencies, or may only work
+ * for certain VM types.  Err on the side of safety and don't
+ * expect that all supported features can be passed one by one
+ * to KVM_SEV_INIT2.
+ *
+ * (Well, right now there's only one...)
+ */
+#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP
+
+int kvm_fd;
+u64 supported_vmsa_features;
+bool have_sev_es;
+
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+       struct kvm_sev_cmd cmd = {
+               .id = cmd_id,
+               .data = (uint64_t)data,
+               .sev_fd = open_sev_dev_path_or_exit(),
+       };
+       int ret;
+
+       ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
+       TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS,
+                   "%d failed: fw error: %d\n",
+                   cmd_id, cmd.error);
+
+       return ret;
+}
+
+static void test_init2(unsigned long vm_type, struct kvm_sev_init *init)
+{
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones_type(vm_type);
+       ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+       TEST_ASSERT(ret == 0,
+                   "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d",
+                   ret, errno);
+       kvm_vm_free(vm);
+}
+
+static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg)
+{
+       struct kvm_vm *vm;
+       int ret;
+
+       vm = vm_create_barebones_type(vm_type);
+       ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+       TEST_ASSERT(ret == -1 && errno == EINVAL,
+                   "KVM_SEV_INIT2 should fail, %s.",
+                   msg);
+       kvm_vm_free(vm);
+}
+
+void test_vm_types(void)
+{
+       test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){});
+
+       /*
+        * TODO: check that unsupported types cannot be created.  Probably
+        * a separate selftest.
+        */
+       if (have_sev_es)
+               test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+
+       test_init2_invalid(0, &(struct kvm_sev_init){},
+                          "VM type is KVM_X86_DEFAULT_VM");
+       if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
+               test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){},
+                                  "VM type is KVM_X86_SW_PROTECTED_VM");
+}
+
+void test_flags(uint32_t vm_type)
+{
+       int i;
+
+       for (i = 0; i < 32; i++)
+               test_init2_invalid(vm_type,
+                       &(struct kvm_sev_init){ .flags = BIT(i) },
+                       "invalid flag");
+}
+
+void test_features(uint32_t vm_type, uint64_t supported_features)
+{
+       int i;
+
+       for (i = 0; i < 64; i++) {
+               if (!(supported_features & BIT_ULL(i)))
+                       test_init2_invalid(vm_type,
+                               &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
+                               "unknown feature");
+               else if (KNOWN_FEATURES & BIT_ULL(i))
+                       test_init2(vm_type,
+                               &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       int kvm_fd = open_kvm_dev_path_or_exit();
+       bool have_sev;
+
+       TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV,
+                                          KVM_X86_SEV_VMSA_FEATURES) == 0);
+       kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV,
+                           KVM_X86_SEV_VMSA_FEATURES,
+                           &supported_vmsa_features);
+
+       have_sev = kvm_cpu_has(X86_FEATURE_SEV);
+       TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)),
+                   "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+                   kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
+
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
+       have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+       TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
+                   "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+                   kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+
+       test_vm_types();
+
+       test_flags(KVM_X86_SEV_VM);
+       if (have_sev_es)
+               test_flags(KVM_X86_SEV_ES_VM);
+
+       test_features(KVM_X86_SEV_VM, 0);
+       if (have_sev_es)
+               test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
new file mode 100644 (file)
index 0000000..0a6dfba
--- /dev/null
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+#include "kselftest.h"
+
+#define NR_MIGRATE_TEST_VCPUS 4
+#define NR_MIGRATE_TEST_VMS 3
+#define NR_LOCK_TESTING_THREADS 3
+#define NR_LOCK_TESTING_ITERATIONS 10000
+
+bool have_sev_es;
+
+static struct kvm_vm *sev_vm_create(bool es)
+{
+       struct kvm_vm *vm;
+       int i;
+
+       vm = vm_create_barebones();
+       if (!es)
+               sev_vm_init(vm);
+       else
+               sev_es_vm_init(vm);
+
+       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+               __vm_vcpu_add(vm, i);
+
+       sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
+
+       if (es)
+               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+       return vm;
+}
+
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
+{
+       struct kvm_vm *vm;
+       int i;
+
+       vm = vm_create_barebones();
+       if (!with_vcpus)
+               return vm;
+
+       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+               __vm_vcpu_add(vm, i);
+
+       return vm;
+}
+
+static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+       return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+       int ret;
+
+       ret = __sev_migrate_from(dst, src);
+       TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void test_sev_migrate_from(bool es)
+{
+       struct kvm_vm *src_vm;
+       struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
+       int i, ret;
+
+       src_vm = sev_vm_create(es);
+       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+               dst_vms[i] = aux_vm_create(true);
+
+       /* Initial migration from the src to the first dst. */
+       sev_migrate_from(dst_vms[0], src_vm);
+
+       for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
+               sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
+
+       /* Migrate the guest back to the original VM. */
+       ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
+       TEST_ASSERT(ret == -1 && errno == EIO,
+                   "VM that was migrated from should be dead. ret %d, errno: %d", ret,
+                   errno);
+
+       kvm_vm_free(src_vm);
+       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+               kvm_vm_free(dst_vms[i]);
+}
+
+struct locking_thread_input {
+       struct kvm_vm *vm;
+       struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
+};
+
+static void *locking_test_thread(void *arg)
+{
+       int i, j;
+       struct locking_thread_input *input = (struct locking_thread_input *)arg;
+
+       for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
+               j = i % NR_LOCK_TESTING_THREADS;
+               __sev_migrate_from(input->vm, input->source_vms[j]);
+       }
+
+       return NULL;
+}
+
+static void test_sev_migrate_locking(void)
+{
+       struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
+       pthread_t pt[NR_LOCK_TESTING_THREADS];
+       int i;
+
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
+               input[i].vm = sev_vm_create(/* es= */ false);
+               input[0].source_vms[i] = input[i].vm;
+       }
+       for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
+               memcpy(input[i].source_vms, input[0].source_vms,
+                      sizeof(input[i].source_vms));
+
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+               pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
+
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+               pthread_join(pt[i], NULL);
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+               kvm_vm_free(input[i].vm);
+}
+
+static void test_sev_migrate_parameters(void)
+{
+       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
+               *sev_es_vm_no_vmsa;
+       int ret;
+
+       vm_no_vcpu = vm_create_barebones();
+       vm_no_sev = aux_vm_create(true);
+       ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
+       TEST_ASSERT(ret == -1 && errno == EINVAL,
+                   "Migrations require SEV enabled. ret %d, errno: %d", ret,
+                   errno);
+
+       if (!have_sev_es)
+               goto out;
+
+       sev_vm = sev_vm_create(/* es= */ false);
+       sev_es_vm = sev_vm_create(/* es= */ true);
+       sev_es_vm_no_vmsa = vm_create_barebones();
+       sev_es_vm_init(sev_es_vm_no_vmsa);
+       __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+
+       ret = __sev_migrate_from(sev_vm, sev_es_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_migrate_from(sev_es_vm, sev_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
+               ret, errno);
+
+       kvm_vm_free(sev_vm);
+       kvm_vm_free(sev_es_vm);
+       kvm_vm_free(sev_es_vm_no_vmsa);
+out:
+       kvm_vm_free(vm_no_vcpu);
+       kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+       return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+       int ret;
+
+       ret = __sev_mirror_create(dst, src);
+       TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
+{
+       struct kvm_sev_guest_status status;
+       int cmd_id;
+
+       for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+               int ret;
+
+               /*
+                * These commands are allowed for mirror VMs, all others are
+                * not.
+                */
+               switch (cmd_id) {
+               case KVM_SEV_LAUNCH_UPDATE_VMSA:
+               case KVM_SEV_GUEST_STATUS:
+               case KVM_SEV_DBG_DECRYPT:
+               case KVM_SEV_DBG_ENCRYPT:
+                       continue;
+               default:
+                       break;
+               }
+
+               /*
+                * These commands should be disallowed before the data
+                * parameter is examined so NULL is OK here.
+                */
+               ret = __vm_sev_ioctl(vm, cmd_id, NULL);
+               TEST_ASSERT(
+                       ret == -1 && errno == EINVAL,
+                       "Should not be able call command: %d. ret: %d, errno: %d",
+                       cmd_id, ret, errno);
+       }
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+}
+
+static void test_sev_mirror(bool es)
+{
+       struct kvm_vm *src_vm, *dst_vm;
+       int i;
+
+       src_vm = sev_vm_create(es);
+       dst_vm = aux_vm_create(false);
+
+       sev_mirror_create(dst_vm, src_vm);
+
+       /* Check that we can complete creation of the mirror VM.  */
+       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+               __vm_vcpu_add(dst_vm, i);
+
+       if (es)
+               vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+       verify_mirror_allowed_cmds(dst_vm);
+
+       kvm_vm_free(src_vm);
+       kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+       int ret;
+
+       sev_vm = sev_vm_create(/* es= */ false);
+       vm_with_vcpu = aux_vm_create(true);
+       vm_no_vcpu = aux_vm_create(false);
+
+       ret = __sev_mirror_create(sev_vm, sev_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able copy context to self. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
+       TEST_ASSERT(ret == -1 && errno == EINVAL,
+                   "Copy context requires SEV enabled. ret %d, errno: %d", ret,
+                   errno);
+
+       ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
+               ret, errno);
+
+       if (!have_sev_es)
+               goto out;
+
+       sev_es_vm = sev_vm_create(/* es= */ true);
+       ret = __sev_mirror_create(sev_vm, sev_es_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
+               ret, errno);
+
+       ret = __sev_mirror_create(sev_es_vm, sev_vm);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
+               ret, errno);
+
+       kvm_vm_free(sev_es_vm);
+
+out:
+       kvm_vm_free(sev_vm);
+       kvm_vm_free(vm_with_vcpu);
+       kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+       struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
+                     *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
+
+       sev_vm = sev_vm_create(/* es= */ false);
+       dst_vm = aux_vm_create(true);
+       dst2_vm = aux_vm_create(true);
+       dst3_vm = aux_vm_create(true);
+       mirror_vm = aux_vm_create(false);
+       dst_mirror_vm = aux_vm_create(false);
+       dst2_mirror_vm = aux_vm_create(false);
+       dst3_mirror_vm = aux_vm_create(false);
+
+       sev_mirror_create(mirror_vm, sev_vm);
+
+       sev_migrate_from(dst_mirror_vm, mirror_vm);
+       sev_migrate_from(dst_vm, sev_vm);
+
+       sev_migrate_from(dst2_vm, dst_vm);
+       sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
+
+       sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
+       sev_migrate_from(dst3_vm, dst2_vm);
+
+       kvm_vm_free(dst_vm);
+       kvm_vm_free(sev_vm);
+       kvm_vm_free(dst2_vm);
+       kvm_vm_free(dst3_vm);
+       kvm_vm_free(mirror_vm);
+       kvm_vm_free(dst_mirror_vm);
+       kvm_vm_free(dst2_mirror_vm);
+       kvm_vm_free(dst3_mirror_vm);
+
+       /*
+        * Run similar test be destroy mirrors before mirrored VMs to ensure
+        * destruction is done safely.
+        */
+       sev_vm = sev_vm_create(/* es= */ false);
+       dst_vm = aux_vm_create(true);
+       mirror_vm = aux_vm_create(false);
+       dst_mirror_vm = aux_vm_create(false);
+
+       sev_mirror_create(mirror_vm, sev_vm);
+
+       sev_migrate_from(dst_mirror_vm, mirror_vm);
+       sev_migrate_from(dst_vm, sev_vm);
+
+       kvm_vm_free(mirror_vm);
+       kvm_vm_free(dst_mirror_vm);
+       kvm_vm_free(dst_vm);
+       kvm_vm_free(sev_vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+       have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+       if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+               test_sev_migrate_from(/* es= */ false);
+               if (have_sev_es)
+                       test_sev_migrate_from(/* es= */ true);
+               test_sev_migrate_locking();
+               test_sev_migrate_parameters();
+               if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+                       test_sev_move_copy();
+       }
+       if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+               test_sev_mirror(/* es= */ false);
+               if (have_sev_es)
+                       test_sev_mirror(/* es= */ true);
+               test_sev_mirror_parameters();
+       }
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
new file mode 100644 (file)
index 0000000..ae77698
--- /dev/null
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <math.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "linux/psp-sev.h"
+#include "sev.h"
+
+
+#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+
+static void guest_sev_es_code(void)
+{
+       /* TODO: Check CPUID after GHCB-based hypercall support is added. */
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+
+       /*
+        * TODO: Add GHCB and ucall support for SEV-ES guests.  For now, simply
+        * force "termination" to signal "done" via the GHCB MSR protocol.
+        */
+       wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+       __asm__ __volatile__("rep; vmmcall");
+}
+
+static void guest_sev_code(void)
+{
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+
+       GUEST_DONE();
+}
+
+/* Stash state passed via VMSA before any compiled code runs.  */
+extern void guest_code_xsave(void);
+asm("guest_code_xsave:\n"
+    "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n"
+    "xor %edx, %edx\n"
+    "xsave (%rdi)\n"
+    "jmp guest_sev_es_code");
+
+static void compare_xsave(u8 *from_host, u8 *from_guest)
+{
+       int i;
+       bool bad = false;
+       for (i = 0; i < 4095; i++) {
+               if (from_host[i] != from_guest[i]) {
+                       printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
+                       bad = true;
+               }
+       }
+
+       if (bad)
+               abort();
+}
+
+static void test_sync_vmsa(uint32_t policy)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       vm_vaddr_t gva;
+       void *hva;
+
+       double x87val = M_PI;
+       struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
+
+       vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
+       gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
+                                   MEM_REGION_TEST_DATA);
+       hva = addr_gva2hva(vm, gva);
+
+       vcpu_args_set(vcpu, 1, gva);
+
+       asm("fninit\n"
+           "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n"
+           "fldl %3\n"
+           "xsave (%2)\n"
+           "fstp %%st\n"
+           : "=m"(xsave)
+           : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val)
+           : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
+       vcpu_xsave_set(vcpu, &xsave);
+
+       vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+
+       /* This page is shared, so make it decrypted.  */
+       memset(hva, 0, 4096);
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+                   "Wanted SYSTEM_EVENT, got %s",
+                   exit_reason_str(vcpu->run->exit_reason));
+       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+
+       compare_xsave((u8 *)&xsave, (u8 *)hva);
+
+       kvm_vm_free(vm);
+}
+
+static void test_sev(void *guest_code, uint64_t policy)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
+
+       vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
+
+       /* TODO: Validate the measurement is as expected. */
+       vm_sev_launch(vm, policy, NULL);
+
+       for (;;) {
+               vcpu_run(vcpu);
+
+               if (policy & SEV_POLICY_ES) {
+                       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+                                   "Wanted SYSTEM_EVENT, got %s",
+                                   exit_reason_str(vcpu->run->exit_reason));
+                       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+                       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+                       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+                       break;
+               }
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       continue;
+               case UCALL_DONE:
+                       return;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_FAIL("Unexpected exit: %s",
+                                 exit_reason_str(vcpu->run->exit_reason));
+               }
+       }
+
+       kvm_vm_free(vm);
+}
+
+static void guest_shutdown_code(void)
+{
+       struct desc_ptr idt;
+
+       /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
+       memset(&idt, 0, sizeof(idt));
+       __asm__ __volatile__("lidt %0" :: "m"(idt));
+
+       __asm__ __volatile__("ud2");
+}
+
+static void test_sev_es_shutdown(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       uint32_t type = KVM_X86_SEV_ES_VM;
+
+       vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
+
+       vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
+                   "Wanted SHUTDOWN, got %s",
+                   exit_reason_str(vcpu->run->exit_reason));
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       const u64 xf_mask = XFEATURE_MASK_X87_AVX;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+       test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
+       test_sev(guest_sev_code, 0);
+
+       if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
+               test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
+               test_sev(guest_sev_es_code, SEV_POLICY_ES);
+
+               test_sev_es_shutdown();
+
+               if (kvm_has_cap(KVM_CAP_XCRS) &&
+                   (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+                       test_sync_vmsa(0);
+                       test_sync_vmsa(SEV_POLICY_NO_DBG);
+               }
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
new file mode 100644 (file)
index 0000000..fabeead
--- /dev/null
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Test that KVM emulates instructions in response to EPT violations when
+ * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
+ */
+#include "flds_emulation.h"
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA 0x0000123456789000
+#define MEM_REGION_GPA 0x0000000700000000
+#define MEM_REGION_SLOT        10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(bool tdp_enabled)
+{
+       uint64_t error_code;
+       uint64_t vector;
+
+       vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
+
+       /*
+        * When TDP is enabled, flds will trigger an emulation failure, exit to
+        * userspace, and then the selftest host "VMM" skips the instruction.
+        *
+        * When TDP is disabled, no instruction emulation is required so flds
+        * should generate #PF(RSVD).
+        */
+       if (tdp_enabled) {
+               GUEST_ASSERT(!vector);
+       } else {
+               GUEST_ASSERT_EQ(vector, PF_VECTOR);
+               GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
+       }
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       uint64_t *pte;
+       uint64_t *hva;
+       uint64_t gpa;
+       int rc;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
+
+       rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+       TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+       vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   MEM_REGION_GPA, MEM_REGION_SLOT,
+                                   MEM_REGION_SIZE / PAGE_SIZE, 0);
+       gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+                                MEM_REGION_GPA, MEM_REGION_SLOT);
+       TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
+       virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+       hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+       memset(hva, 0, PAGE_SIZE);
+
+       pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
+       *pte |= BIT_ULL(MAXPHYADDR);
+
+       vcpu_run(vcpu);
+
+       /*
+        * When TDP is enabled, KVM must emulate in response the guest physical
+        * address that is illegal from the guest's perspective, but is legal
+        * from hardware's perspeective.  This should result in an emulation
+        * failure exit to userspace since KVM doesn't support emulating flds.
+        */
+       if (kvm_is_tdp_enabled()) {
+               handle_flds_emulation_failure_exit(vcpu);
+               vcpu_run(vcpu);
+       }
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
+       }
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c
new file mode 100644 (file)
index 0000000..55c88d6
--- /dev/null
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for SMM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+#include "svm_util.h"
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+#define SYNC_PORT 0xe
+#define DONE 0xff
+
+/*
+ * This is compiled as normal 64-bit code, however, SMI handler is executed
+ * in real-address mode. To stay simple we're limiting ourselves to a mode
+ * independent subset of asm here.
+ * SMI handler always report back fixed stage SMRAM_STAGE.
+ */
+uint8_t smi_handler[] = {
+       0xb0, SMRAM_STAGE,    /* mov $SMRAM_STAGE, %al */
+       0xe4, SYNC_PORT,      /* in $SYNC_PORT, %al */
+       0x0f, 0xaa,           /* rsm */
+};
+
+static inline void sync_with_host(uint64_t phase)
+{
+       asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
+                    : "+a" (phase));
+}
+
+static void self_smi(void)
+{
+       x2apic_write_reg(APIC_ICR,
+                        APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+static void l2_guest_code(void)
+{
+       sync_with_host(8);
+
+       sync_with_host(10);
+
+       vmcall();
+}
+
+static void guest_code(void *arg)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+       struct svm_test_data *svm = arg;
+       struct vmx_pages *vmx_pages = arg;
+
+       sync_with_host(1);
+
+       wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
+
+       sync_with_host(2);
+
+       self_smi();
+
+       sync_with_host(4);
+
+       if (arg) {
+               if (this_cpu_has(X86_FEATURE_SVM)) {
+                       generic_svm_setup(svm, l2_guest_code,
+                                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+               } else {
+                       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+                       GUEST_ASSERT(load_vmcs(vmx_pages));
+                       prepare_vmcs(vmx_pages, l2_guest_code,
+                                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+               }
+
+               sync_with_host(5);
+
+               self_smi();
+
+               sync_with_host(7);
+
+               if (this_cpu_has(X86_FEATURE_SVM)) {
+                       run_guest(svm->vmcb, svm->vmcb_gpa);
+                       run_guest(svm->vmcb, svm->vmcb_gpa);
+               } else {
+                       vmlaunch();
+                       vmresume();
+               }
+
+               /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
+               sync_with_host(12);
+       }
+
+       sync_with_host(DONE);
+}
+
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu_events events;
+
+       vcpu_events_get(vcpu, &events);
+
+       events.smi.pending = 1;
+       events.flags |= KVM_VCPUEVENT_VALID_SMM;
+
+       vcpu_events_set(vcpu, &events);
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t nested_gva = 0;
+
+       struct kvm_vcpu *vcpu;
+       struct kvm_regs regs;
+       struct kvm_vm *vm;
+       struct kvm_x86_state *state;
+       int stage, stage_reported;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
+                                   SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+       TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
+                   == SMRAM_GPA, "could not allocate guest physical addresses?");
+
+       memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
+       memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
+              sizeof(smi_handler));
+
+       vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
+
+       if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+               if (kvm_cpu_has(X86_FEATURE_SVM))
+                       vcpu_alloc_svm(vm, &nested_gva);
+               else if (kvm_cpu_has(X86_FEATURE_VMX))
+                       vcpu_alloc_vmx(vm, &nested_gva);
+       }
+
+       if (!nested_gva)
+               pr_info("will skip SMM test with VMX enabled\n");
+
+       vcpu_args_set(vcpu, 1, nested_gva);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               memset(&regs, 0, sizeof(regs));
+               vcpu_regs_get(vcpu, &regs);
+
+               stage_reported = regs.rax & 0xff;
+
+               if (stage_reported == DONE)
+                       goto done;
+
+               TEST_ASSERT(stage_reported == stage ||
+                           stage_reported == SMRAM_STAGE,
+                           "Unexpected stage: #%x, got %x",
+                           stage, stage_reported);
+
+               /*
+                * Enter SMM during L2 execution and check that we correctly
+                * return from it. Do not perform save/restore while in SMM yet.
+                */
+               if (stage == 8) {
+                       inject_smi(vcpu);
+                       continue;
+               }
+
+               /*
+                * Perform save/restore while the guest is in SMM triggered
+                * during L2 execution.
+                */
+               if (stage == 10)
+                       inject_smi(vcpu);
+
+               state = vcpu_save_state(vcpu);
+               kvm_vm_release(vm);
+
+               vcpu = vm_recreate_with_one_vcpu(vm);
+               vcpu_load_state(vcpu, state);
+               kvm_x86_state_cleanup(state);
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
new file mode 100644 (file)
index 0000000..141b7fc
--- /dev/null
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for vCPU state save/restore, including nested guest state.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+void svm_l2_guest_code(void)
+{
+       GUEST_SYNC(4);
+       /* Exit to L1 */
+       vmcall();
+       GUEST_SYNC(6);
+       /* Done, exit to L1 and never come back.  */
+       vmcall();
+}
+
+static void svm_l1_guest_code(struct svm_test_data *svm)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       GUEST_ASSERT(svm->vmcb_gpa);
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm, svm_l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_SYNC(3);
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_SYNC(5);
+       vmcb->save.rip += 3;
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_SYNC(7);
+}
+
+void vmx_l2_guest_code(void)
+{
+       GUEST_SYNC(6);
+
+       /* Exit to L1 */
+       vmcall();
+
+       /* L1 has now set up a shadow VMCS for us.  */
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+       GUEST_SYNC(10);
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+       GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
+       GUEST_SYNC(11);
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
+       GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
+       GUEST_SYNC(12);
+
+       /* Done, exit to L1 and never come back.  */
+       vmcall();
+}
+
+static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT(vmx_pages->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_SYNC(3);
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+       GUEST_SYNC(4);
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+       prepare_vmcs(vmx_pages, vmx_l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_SYNC(5);
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       /* Check that the launched state is preserved.  */
+       GUEST_ASSERT(vmlaunch());
+
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       GUEST_SYNC(7);
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
+
+       vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+       vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
+
+       GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+       GUEST_ASSERT(vmlaunch());
+       GUEST_SYNC(8);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(vmresume());
+
+       vmwrite(GUEST_RIP, 0xc0ffee);
+       GUEST_SYNC(9);
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+
+       GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(vmresume());
+       GUEST_SYNC(13);
+       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(vmresume());
+}
+
+static void __attribute__((__flatten__)) guest_code(void *arg)
+{
+       GUEST_SYNC(1);
+
+       if (this_cpu_has(X86_FEATURE_XSAVE)) {
+               uint64_t supported_xcr0 = this_cpu_supported_xcr0();
+               uint8_t buffer[4096];
+
+               memset(buffer, 0xcc, sizeof(buffer));
+
+               /*
+                * Modify state for all supported xfeatures to take them out of
+                * their "init" state, i.e. to make them show up in XSTATE_BV.
+                *
+                * Note off-by-default features, e.g. AMX, are out of scope for
+                * this particular testcase as they have a different ABI.
+                */
+               GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
+               asm volatile ("fincstp");
+
+               GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
+               asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
+
+               if (supported_xcr0 & XFEATURE_MASK_YMM)
+                       asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
+
+               if (supported_xcr0 & XFEATURE_MASK_AVX512) {
+                       asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
+                       asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
+                       asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
+               }
+
+               if (this_cpu_has(X86_FEATURE_MPX)) {
+                       uint64_t bounds[2] = { 10, 0xffffffffull };
+                       uint64_t output[2] = { };
+
+                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
+                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
+
+                       /*
+                        * Don't bother trying to get BNDCSR into the INUSE
+                        * state.  MSR_IA32_BNDCFGS doesn't count as it isn't
+                        * managed via XSAVE/XRSTOR, and BNDCFGU can only be
+                        * modified by XRSTOR.  Stuffing XSTATE_BV in the host
+                        * is simpler than doing XRSTOR here in the guest.
+                        *
+                        * However, temporarily enable MPX in BNDCFGS so that
+                        * BNDMOV actually loads BND1.  If MPX isn't *fully*
+                        * enabled, all MPX instructions are treated as NOPs.
+                        *
+                        * Hand encode "bndmov (%rax),%bnd1" as support for MPX
+                        * mnemonics/registers has been removed from gcc and
+                        * clang (and was never fully supported by clang).
+                        */
+                       wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
+                       asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
+                       /*
+                        * Hand encode "bndmov %bnd1, (%rax)" to sanity check
+                        * that BND1 actually got loaded.
+                        */
+                       asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
+                       wrmsr(MSR_IA32_BNDCFGS, 0);
+
+                       GUEST_ASSERT_EQ(bounds[0], output[0]);
+                       GUEST_ASSERT_EQ(bounds[1], output[1]);
+               }
+               if (this_cpu_has(X86_FEATURE_PKU)) {
+                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
+                       set_cr4(get_cr4() | X86_CR4_PKE);
+                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
+
+                       wrpkru(-1u);
+               }
+       }
+
+       GUEST_SYNC(2);
+
+       if (arg) {
+               if (this_cpu_has(X86_FEATURE_SVM))
+                       svm_l1_guest_code(arg);
+               else
+                       vmx_l1_guest_code(arg);
+       }
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       uint64_t *xstate_bv, saved_xstate_bv;
+       vm_vaddr_t nested_gva = 0;
+       struct kvm_cpuid2 empty_cpuid = {};
+       struct kvm_regs regs1, regs2;
+       struct kvm_vcpu *vcpu, *vcpuN;
+       struct kvm_vm *vm;
+       struct kvm_x86_state *state;
+       struct ucall uc;
+       int stage;
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vcpu_regs_get(vcpu, &regs1);
+
+       if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+               if (kvm_cpu_has(X86_FEATURE_SVM))
+                       vcpu_alloc_svm(vm, &nested_gva);
+               else if (kvm_cpu_has(X86_FEATURE_VMX))
+                       vcpu_alloc_vmx(vm, &nested_gva);
+       }
+
+       if (!nested_gva)
+               pr_info("will skip nested state checks\n");
+
+       vcpu_args_set(vcpu, 1, nested_gva);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               /* UCALL_SYNC is handled here.  */
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+
+               state = vcpu_save_state(vcpu);
+               memset(&regs1, 0, sizeof(regs1));
+               vcpu_regs_get(vcpu, &regs1);
+
+               kvm_vm_release(vm);
+
+               /* Restore state in a new VM.  */
+               vcpu = vm_recreate_with_one_vcpu(vm);
+               vcpu_load_state(vcpu, state);
+
+               /*
+                * Restore XSAVE state in a dummy vCPU, first without doing
+                * KVM_SET_CPUID2, and then with an empty guest CPUID.  Except
+                * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
+                * allow KVM_SET_XSAVE regardless of guest CPUID.  Manually
+                * load only XSAVE state, MSRs in particular have a much more
+                * convoluted ABI.
+                *
+                * Load two versions of XSAVE state: one with the actual guest
+                * XSAVE state, and one with all supported features forced "on"
+                * in xstate_bv, e.g. to ensure that KVM allows loading all
+                * supported features, even if something goes awry in saving
+                * the original snapshot.
+                */
+               xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+               saved_xstate_bv = *xstate_bv;
+
+               vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
+               vcpu_xsave_set(vcpuN, state->xsave);
+               *xstate_bv = kvm_cpu_supported_xcr0();
+               vcpu_xsave_set(vcpuN, state->xsave);
+
+               vcpu_init_cpuid(vcpuN, &empty_cpuid);
+               vcpu_xsave_set(vcpuN, state->xsave);
+               *xstate_bv = saved_xstate_bv;
+               vcpu_xsave_set(vcpuN, state->xsave);
+
+               kvm_x86_state_cleanup(state);
+
+               memset(&regs2, 0, sizeof(regs2));
+               vcpu_regs_get(vcpu, &regs2);
+               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+                           (ulong) regs2.rdi, (ulong) regs2.rsi);
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
new file mode 100644 (file)
index 0000000..916e042
--- /dev/null
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+       vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+       x2apic_write_reg(APIC_EOI, 0x00);
+       intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+       /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+        * and since L1 didn't enable virtual interrupt masking,
+        * L2 should receive it and not L1.
+        *
+        * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+        * so it should also receive it after the following 'sti'.
+        */
+       x2apic_write_reg(APIC_ICR,
+               APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+       __asm__ __volatile__(
+               "sti\n"
+               "nop\n"
+       );
+
+       GUEST_ASSERT(vintr_irq_called);
+       GUEST_ASSERT(intr_irq_called);
+
+       __asm__ __volatile__(
+               "vmcall\n"
+       );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       x2apic_enable();
+
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm, l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* No virtual interrupt masking */
+       vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+       /* No intercepts for real and virtual interrupts */
+       vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
+
+       /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+       vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+       vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t svm_gva;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+       vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+       vcpu_alloc_svm(vm, &svm_gva);
+       vcpu_args_set(vcpu, 1, svm_gva);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+               /* NOT REACHED */
+       case UCALL_DONE:
+               goto done;
+       default:
+               TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+       }
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
new file mode 100644 (file)
index 0000000..00135cb
--- /dev/null
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_nested_shutdown_test
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+       __asm__ __volatile__("ud2");
+}
+
+static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       generic_svm_setup(svm, l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+       idt[6].p   = 0; // #UD is intercepted but its injection will cause #NP
+       idt[11].p  = 0; // #NP is not intercepted and will cause another
+                       // #NP that will be converted to #DF
+       idt[8].p   = 0; // #DF will cause #NP which will cause SHUTDOWN
+
+       run_guest(vmcb, svm->vmcb_gpa);
+
+       /* should not reach here */
+       GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t svm_gva;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+       vcpu_alloc_svm(vm, &svm_gva);
+
+       vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
new file mode 100644 (file)
index 0000000..7b6481d
--- /dev/null
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Oracle and/or its affiliates.
+ *
+ * Based on:
+ *   svm_int_ctl_test
+ *
+ *   Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+#include <stdatomic.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+
+#define INT_NR                 0x20
+
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static unsigned int bp_fired;
+static void guest_bp_handler(struct ex_regs *regs)
+{
+       bp_fired++;
+}
+
+static unsigned int int_fired;
+static void l2_guest_code_int(void);
+
+static void guest_int_handler(struct ex_regs *regs)
+{
+       int_fired++;
+       GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
+}
+
+static void l2_guest_code_int(void)
+{
+       GUEST_ASSERT_EQ(int_fired, 1);
+
+       /*
+         * Same as the vmmcall() function, but with a ud2 sneaked after the
+         * vmmcall.  The caller injects an exception with the return address
+         * increased by 2, so the "pop rbp" must be after the ud2 and we cannot
+        * use vmmcall() directly.
+         */
+       __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
+                             : : "a"(0xdeadbeef), "c"(0xbeefdead)
+                             : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+                               "r10", "r11", "r12", "r13", "r14", "r15");
+
+       GUEST_ASSERT_EQ(bp_fired, 1);
+       hlt();
+}
+
+static atomic_int nmi_stage;
+#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
+#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+       nmi_stage_inc();
+
+       if (nmi_stage_get() == 1) {
+               vmmcall();
+               GUEST_FAIL("Unexpected resume after VMMCALL");
+       } else {
+               GUEST_ASSERT_EQ(nmi_stage_get(), 3);
+               GUEST_DONE();
+       }
+}
+
+static void l2_guest_code_nmi(void)
+{
+       ud2();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       if (is_nmi)
+               x2apic_enable();
+
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm,
+                         is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
+       vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
+
+       if (is_nmi) {
+               vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+       } else {
+               vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
+               /* The return address pushed on stack */
+               vmcb->control.next_rip = vmcb->save.rip;
+       }
+
+       run_guest(vmcb, svm->vmcb_gpa);
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+                      "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+                      vmcb->control.exit_code,
+                      vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+       if (is_nmi) {
+               clgi();
+               x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
+
+               GUEST_ASSERT_EQ(nmi_stage_get(), 1);
+               nmi_stage_inc();
+
+               stgi();
+               /* self-NMI happens here */
+               while (true)
+                       cpu_relax();
+       }
+
+       /* Skip over VMMCALL */
+       vmcb->save.rip += 3;
+
+       /* Switch to alternate IDT to cause intervening NPF again */
+       vmcb->save.idtr.base = idt_alt;
+       vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
+
+       vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+       /* The return address pushed on stack, skip over UD2 */
+       vmcb->control.next_rip = vmcb->save.rip + 2;
+
+       run_guest(vmcb, svm->vmcb_gpa);
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+                      "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+                      vmcb->control.exit_code,
+                      vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+       GUEST_DONE();
+}
+
+static void run_test(bool is_nmi)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       vm_vaddr_t svm_gva;
+       vm_vaddr_t idt_alt_vm;
+       struct kvm_guest_debug debug;
+
+       pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+       vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
+       vm_install_exception_handler(vm, INT_NR, guest_int_handler);
+
+       vcpu_alloc_svm(vm, &svm_gva);
+
+       if (!is_nmi) {
+               void *idt, *idt_alt;
+
+               idt_alt_vm = vm_vaddr_alloc_page(vm);
+               idt_alt = addr_gva2hva(vm, idt_alt_vm);
+               idt = addr_gva2hva(vm, vm->arch.idt);
+               memcpy(idt_alt, idt, getpagesize());
+       } else {
+               idt_alt_vm = 0;
+       }
+       vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+
+       memset(&debug, 0, sizeof(debug));
+       vcpu_guest_debug_set(vcpu, &debug);
+
+       struct ucall uc;
+
+       alarm(2);
+       vcpu_run(vcpu);
+       alarm(0);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+               /* NOT REACHED */
+       case UCALL_DONE:
+               goto done;
+       default:
+               TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+       }
+done:
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+       TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
+                   "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
+
+       atomic_init(&nmi_stage, 0);
+
+       run_test(false);
+       run_test(true);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
new file mode 100644 (file)
index 0000000..8a62cca
--- /dev/null
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * Nested SVM testing: VMCALL
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+       __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm, l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       run_guest(vmcb, svm->vmcb_gpa);
+
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       vm_vaddr_t svm_gva;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       vcpu_alloc_svm(vm, &svm_gva);
+       vcpu_args_set(vcpu, 1, svm_gva);
+
+       for (;;) {
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+               }
+       }
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c
new file mode 100644 (file)
index 0000000..8fa3948
--- /dev/null
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include "kvm_test_harness.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+struct ucall uc_none = {
+       .cmd = UCALL_NONE,
+};
+
+/*
+ * ucall is embedded here to protect against compiler reshuffling registers
+ * before calling a function. In this test we only need to get KVM_EXIT_IO
+ * vmexit and preserve RBX, no additional information is needed.
+ */
+void guest_code(void)
+{
+       asm volatile("1: in %[port], %%al\n"
+                    "add $0x1, %%rbx\n"
+                    "jmp 1b"
+                    : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+                    : "rax", "rbx");
+}
+
+KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+       TEST_ASSERT(left->reg == right->reg, \
+                   "Register " #reg \
+                   " values did not match: 0x%llx, 0x%llx", \
+                   left->reg, right->reg)
+       REG_COMPARE(rax);
+       REG_COMPARE(rbx);
+       REG_COMPARE(rcx);
+       REG_COMPARE(rdx);
+       REG_COMPARE(rsi);
+       REG_COMPARE(rdi);
+       REG_COMPARE(rsp);
+       REG_COMPARE(rbp);
+       REG_COMPARE(r8);
+       REG_COMPARE(r9);
+       REG_COMPARE(r10);
+       REG_COMPARE(r11);
+       REG_COMPARE(r12);
+       REG_COMPARE(r13);
+       REG_COMPARE(r14);
+       REG_COMPARE(r15);
+       REG_COMPARE(rip);
+       REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+                               struct kvm_vcpu_events *right)
+{
+}
+
+#define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       WRITE_ONCE(events->exception.nr, UD_VECTOR);
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.injected, 1);
+               WRITE_ONCE(events->exception.pending, 1);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events.  KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.nr, UD_VECTOR);
+               WRITE_ONCE(events->exception.pending, 1);
+               WRITE_ONCE(events->exception.nr, 255);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       __u64 *cr4 = &run->s.regs.sregs.cr4;
+       __u64 pae_enabled = *cr4;
+       __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+               WRITE_ONCE(*cr4, pae_enabled);
+               asm volatile(".rept 512\n\t"
+                            "nop\n\t"
+                            ".endr");
+               WRITE_ONCE(*cr4, pae_disabled);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
+{
+       const time_t TIMEOUT = 2; /* seconds, roughly */
+       struct kvm_x86_state *state;
+       struct kvm_translation tr;
+       struct kvm_run *run;
+       pthread_t thread;
+       time_t t;
+
+       run = vcpu->run;
+
+       run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+       vcpu_run(vcpu);
+       run->kvm_valid_regs = 0;
+
+       /* Save state *before* spawning the thread that mucks with vCPU state. */
+       state = vcpu_save_state(vcpu);
+
+       /*
+        * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+        * should already be set in guest state.
+        */
+       TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+                   (run->s.regs.sregs.efer & EFER_LME),
+                   "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+                   !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+                   !!(run->s.regs.sregs.efer & EFER_LME));
+
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+               /*
+                * Reload known good state if the vCPU triple faults, e.g. due
+                * to the unhandled #GPs being injected.  VMX preserves state
+                * on shutdown, but SVM synthesizes an INIT as the VMCB state
+                * is architecturally undefined on triple fault.
+                */
+               if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+                       vcpu_load_state(vcpu, state);
+
+               if (racer == race_sregs_cr4) {
+                       tr = (struct kvm_translation) { .linear_address = 0 };
+                       __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+               }
+       }
+
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+       kvm_x86_state_cleanup(state);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Request reading invalid register set from VCPU. */
+       run->kvm_valid_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_valid_regs = 0;
+
+       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_valid_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
+
+       /* Request setting invalid register set into VCPU. */
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_dirty_regs = 0;
+
+       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+       rv = _vcpu_run(vcpu);
+       TEST_ASSERT(rv < 0 && errno == EINVAL,
+                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
+                   rv);
+       run->kvm_dirty_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vcpu_events events;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+
+       /* Request and verify all valid register sets. */
+       /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       vcpu_regs_get(vcpu, &regs);
+       compare_regs(&regs, &run->s.regs.regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       compare_sregs(&sregs, &run->s.regs.sregs);
+
+       vcpu_events_get(vcpu, &events);
+       compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vcpu_events events;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       /* Set and verify various register values. */
+       run->s.regs.regs.rbx = 0xBAD1DEA;
+       run->s.regs.sregs.apic_base = 1 << 11;
+       /* TODO run->s.regs.events.XYZ = ABC; */
+
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
+                   "rbx sync regs value incorrect 0x%llx.",
+                   run->s.regs.regs.rbx);
+       TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+                   "apic_base sync regs value incorrect 0x%llx.",
+                   run->s.regs.sregs.apic_base);
+
+       vcpu_regs_get(vcpu, &regs);
+       compare_regs(&regs, &run->s.regs.regs);
+
+       vcpu_sregs_get(vcpu, &sregs);
+       compare_sregs(&sregs, &run->s.regs.sregs);
+
+       vcpu_events_get(vcpu, &events);
+       compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+
+       /* Clear kvm_dirty_regs bits, verify new s.regs values are
+        * overwritten with existing guest values.
+        */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       run->kvm_dirty_regs = 0;
+       run->s.regs.regs.rbx = 0xDEADBEEF;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
+                   "rbx sync regs value incorrect 0x%llx.",
+                   run->s.regs.regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       /* Clear kvm_valid_regs bits and kvm_dirty_bits.
+        * Verify s.regs values are not overwritten with existing guest values
+        * and that guest values are not overwritten with kvm_sync_regs values.
+        */
+       run->kvm_valid_regs = 0;
+       run->kvm_dirty_regs = 0;
+       run->s.regs.regs.rbx = 0xAAAA;
+       vcpu_regs_get(vcpu, &regs);
+       regs.rbx = 0xBAC0;
+       vcpu_regs_set(vcpu, &regs);
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
+                   "rbx sync regs value incorrect 0x%llx.",
+                   run->s.regs.regs.rbx);
+       vcpu_regs_get(vcpu, &regs);
+       TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
+                   "rbx guest value incorrect 0x%llx.",
+                   regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+        * with existing guest values but that guest values are overwritten
+        * with kvm_sync_regs values.
+        */
+       run->kvm_valid_regs = 0;
+       run->kvm_dirty_regs = TEST_SYNC_FIELDS;
+       run->s.regs.regs.rbx = 0xBBBB;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
+                   "rbx sync regs value incorrect 0x%llx.",
+                   run->s.regs.regs.rbx);
+       vcpu_regs_get(vcpu, &regs);
+       TEST_ASSERT(regs.rbx == 0xBBBB + 1,
+                   "rbx guest value incorrect 0x%llx.",
+                   regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
+{
+       race_sync_regs(vcpu, race_sregs_cr4);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
+{
+       race_sync_regs(vcpu, race_events_exc);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
+{
+       race_sync_regs(vcpu, race_events_inj_pen);
+}
+
+int main(int argc, char *argv[])
+{
+       int cap;
+
+       cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+       TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+       TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
+
+       return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
new file mode 100644 (file)
index 0000000..56306a1
--- /dev/null
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT      0x2000
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+       asm volatile("inb %%dx, %%al"
+                    : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+#define L2_GUEST_STACK_SIZE 64
+unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+void l1_guest_code_vmx(struct vmx_pages *vmx)
+{
+
+       GUEST_ASSERT(vmx->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+       GUEST_ASSERT(load_vmcs(vmx));
+
+       prepare_vmcs(vmx, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_ASSERT(!vmlaunch());
+       /* L2 should triple fault after a triple fault event injected. */
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+       GUEST_DONE();
+}
+
+void l1_guest_code_svm(struct svm_test_data *svm)
+{
+       struct vmcb *vmcb = svm->vmcb;
+
+       generic_svm_setup(svm, l2_guest_code,
+                       &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* don't intercept shutdown to test the case of SVM allowing to do so */
+       vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+       run_guest(vmcb, svm->vmcb_gpa);
+
+       /* should not reach here, L1 should crash  */
+       GUEST_ASSERT(0);
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vcpu_events events;
+       struct ucall uc;
+
+       bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
+       bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
+
+       TEST_REQUIRE(has_vmx || has_svm);
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
+
+
+       if (has_vmx) {
+               vm_vaddr_t vmx_pages_gva;
+
+               vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
+               vcpu_alloc_vmx(vm, &vmx_pages_gva);
+               vcpu_args_set(vcpu, 1, vmx_pages_gva);
+       } else {
+               vm_vaddr_t svm_gva;
+
+               vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
+               vcpu_alloc_svm(vm, &svm_gva);
+               vcpu_args_set(vcpu, 1, svm_gva);
+       }
+
+       vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
+       run = vcpu->run;
+       vcpu_run(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+                   "Expected IN from port %d from L2, got port %d",
+                   ARBITRARY_IO_PORT, run->io.port);
+       vcpu_events_get(vcpu, &events);
+       events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+       events.triple_fault.pending = true;
+       vcpu_events_set(vcpu, &events);
+       run->immediate_exit = true;
+       vcpu_run_complete_io(vcpu);
+
+       vcpu_events_get(vcpu, &events);
+       TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
+                   "Triple fault event invalid");
+       TEST_ASSERT(events.triple_fault.pending,
+                   "No triple fault pending");
+       vcpu_run(vcpu);
+
+
+       if (has_svm) {
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+       } else {
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_DONE:
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+               }
+       }
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
new file mode 100644 (file)
index 0000000..12b0964
--- /dev/null
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UNITY                  (1ull << 30)
+#define HOST_ADJUST            (UNITY * 64)
+#define GUEST_STEP             (UNITY * 4)
+#define ROUND(x)               ((x + UNITY / 2) & -UNITY)
+#define rounded_rdmsr(x)       ROUND(rdmsr(x))
+#define rounded_host_rdmsr(x)  ROUND(vcpu_get_msr(vcpu, x))
+
+static void guest_code(void)
+{
+       u64 val = 0;
+
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
+       val = 1ull * GUEST_STEP;
+       wrmsr(MSR_IA32_TSC, val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
+       GUEST_SYNC(2);
+       val = 2ull * GUEST_STEP;
+       wrmsr(MSR_IA32_TSC_ADJUST, val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Host: setting the TSC offset.  */
+       GUEST_SYNC(3);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+        * host-side offset and affect both MSRs.
+        */
+       GUEST_SYNC(4);
+       val = 3ull * GUEST_STEP;
+       wrmsr(MSR_IA32_TSC_ADJUST, val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+        * offset is now visible in MSR_IA32_TSC_ADJUST.
+        */
+       GUEST_SYNC(5);
+       val = 4ull * GUEST_STEP;
+       wrmsr(MSR_IA32_TSC, val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+       GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               if (!strcmp((const char *)uc.args[0], "hello") &&
+                   uc.args[1] == stage + 1)
+                       ksft_test_result_pass("stage %d passed\n", stage + 1);
+               else
+                       ksft_test_result_fail(
+                               "stage %d: Unexpected register values vmexit, got %lx",
+                               stage + 1, (ulong)uc.args[1]);
+               return;
+       case UCALL_DONE:
+               ksft_test_result_pass("stage %d passed\n", stage + 1);
+               return;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_ASSERT(false, "Unexpected exit: %s",
+                           exit_reason_str(vcpu->run->exit_reason));
+       }
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       uint64_t val;
+
+       ksft_print_header();
+       ksft_set_plan(5);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       val = 0;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
+       run_vcpu(vcpu, 1);
+       val = 1ull * GUEST_STEP;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
+       run_vcpu(vcpu, 2);
+       val = 2ull * GUEST_STEP;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Host: writes to MSR_IA32_TSC set the host-side offset
+        * and therefore do not change MSR_IA32_TSC_ADJUST.
+        */
+       vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       run_vcpu(vcpu, 3);
+
+       /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC.  */
+       vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+
+       /* Restore previous value.  */
+       vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+        * host-side offset and affect both MSRs.
+        */
+       run_vcpu(vcpu, 4);
+       val = 3ull * GUEST_STEP;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+       /*
+        * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+        * offset is now visible in MSR_IA32_TSC_ADJUST.
+        */
+       run_vcpu(vcpu, 5);
+       val = 4ull * GUEST_STEP;
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+       kvm_vm_free(vm);
+
+       ksft_finished();        /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
new file mode 100644 (file)
index 0000000..59c7304
--- /dev/null
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#define NR_TEST_VCPUS 20
+
+static struct kvm_vm *vm;
+pthread_spinlock_t create_lock;
+
+#define TEST_TSC_KHZ    2345678UL
+#define TEST_TSC_OFFSET 200000000
+
+uint64_t tsc_sync;
+static void guest_code(void)
+{
+       uint64_t start_tsc, local_tsc, tmp;
+
+       start_tsc = rdtsc();
+       do {
+               tmp = READ_ONCE(tsc_sync);
+               local_tsc = rdtsc();
+               WRITE_ONCE(tsc_sync, local_tsc);
+               if (unlikely(local_tsc < tmp))
+                       GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
+
+       } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
+
+       GUEST_DONE();
+}
+
+
+static void *run_vcpu(void *_cpu_nr)
+{
+       unsigned long vcpu_id = (unsigned long)_cpu_nr;
+       unsigned long failures = 0;
+       static bool first_cpu_done;
+       struct kvm_vcpu *vcpu;
+
+       /* The kernel is fine, but vm_vcpu_add() needs locking */
+       pthread_spin_lock(&create_lock);
+
+       vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
+
+       if (!first_cpu_done) {
+               first_cpu_done = true;
+               vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+       }
+
+       pthread_spin_unlock(&create_lock);
+
+       for (;;) {
+                struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+                case UCALL_DONE:
+                       goto out;
+
+                case UCALL_SYNC:
+                       printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
+                              uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+                       failures++;
+                       break;
+
+                default:
+                        TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+ out:
+       return (void *)failures;
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
+
+       vm = vm_create(NR_TEST_VCPUS);
+       vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+
+       pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
+       pthread_t cpu_threads[NR_TEST_VCPUS];
+       unsigned long cpu;
+       for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
+               pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
+
+       unsigned long failures = 0;
+       for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
+               void *this_cpu_failures;
+               pthread_join(cpu_threads[cpu], &this_cpu_failures);
+               failures += (unsigned long)this_cpu_failures;
+       }
+
+       TEST_ASSERT(!failures, "TSC sync failed");
+       pthread_spin_destroy(&create_lock);
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
new file mode 100644 (file)
index 0000000..57f157c
--- /dev/null
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucna_injection_test
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that user space can inject UnCorrectable No Action required (UCNA)
+ * memory errors to the guest.
+ *
+ * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
+ * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
+ * corresponding per-bank control register (MCI_CTL2) bit enabled.
+ * The test also checks that the UCNA errors get recorded in the
+ * Machine Check bank registers no matter the error signal interrupts get
+ * delivered into the guest or not.
+ *
+ */
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "mce.h"
+#include "processor.h"
+#include "test_util.h"
+#include "apic.h"
+
+#define SYNC_FIRST_UCNA 9
+#define SYNC_SECOND_UCNA 10
+#define SYNC_GP 11
+#define FIRST_UCNA_ADDR 0xdeadbeef
+#define SECOND_UCNA_ADDR 0xcafeb0ba
+
+/*
+ * Vector for the CMCI interrupt.
+ * Value is arbitrary. Any value in 0x20-0xFF should work:
+ * https://wiki.osdev.org/Interrupt_Vector_Table
+ */
+#define CMCI_VECTOR  0xa9
+
+#define UCNA_BANK  0x7 // IMC0 bank
+
+#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
+
+static uint64_t supported_mcg_caps;
+
+/*
+ * Record states about the injected UCNA.
+ * The variables started with the 'i_' prefixes are recorded in interrupt
+ * handler. Variables without the 'i_' prefixes are recorded in guest main
+ * execution thread.
+ */
+static volatile uint64_t i_ucna_rcvd;
+static volatile uint64_t i_ucna_addr;
+static volatile uint64_t ucna_addr;
+static volatile uint64_t ucna_addr2;
+
+struct thread_params {
+       struct kvm_vcpu *vcpu;
+       uint64_t *p_i_ucna_rcvd;
+       uint64_t *p_i_ucna_addr;
+       uint64_t *p_ucna_addr;
+       uint64_t *p_ucna_addr2;
+};
+
+static void verify_apic_base_addr(void)
+{
+       uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+       uint64_t base = GET_APIC_BASE(msr);
+
+       GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void ucna_injection_guest_code(void)
+{
+       uint64_t ctl2;
+       verify_apic_base_addr();
+       xapic_enable();
+
+       /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
+       xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
+       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+       /* Enables interrupt in guest. */
+       asm volatile("sti");
+
+       /* Let user space inject the first UCNA */
+       GUEST_SYNC(SYNC_FIRST_UCNA);
+
+       ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+
+       /* Disables the per-bank CMCI signaling. */
+       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
+
+       /* Let the user space inject the second UCNA */
+       GUEST_SYNC(SYNC_SECOND_UCNA);
+
+       ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+       GUEST_DONE();
+}
+
+static void cmci_disabled_guest_code(void)
+{
+       uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+       GUEST_DONE();
+}
+
+static void cmci_enabled_guest_code(void)
+{
+       uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
+
+       GUEST_DONE();
+}
+
+static void guest_cmci_handler(struct ex_regs *regs)
+{
+       i_ucna_rcvd++;
+       i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+       xapic_write_reg(APIC_EOI, 0);
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+       GUEST_SYNC(SYNC_GP);
+}
+
+static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
+                   "Expect UCALL_SYNC");
+       TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
+       printf("vCPU received GP in guest.\n");
+}
+
+static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+       /*
+        * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
+        * the IA32_MCi_STATUS register.
+        * MSCOD=1 (BIT[16] - MscodDataRdErr).
+        * MCACOD=0x0090 (Memory controller error format, channel 0)
+        */
+       uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+                         MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
+       struct kvm_x86_mce mce = {};
+       mce.status = status;
+       mce.mcg_status = 0;
+       /*
+        * MCM_ADDR_PHYS indicates the reported address is a physical address.
+        * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
+        * is at 4KB granularity.
+        */
+       mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+       mce.addr = addr;
+       mce.bank = UCNA_BANK;
+
+       vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
+}
+
+static void *run_ucna_injection(void *arg)
+{
+       struct thread_params *params = (struct thread_params *)arg;
+       struct ucall uc;
+       int old;
+       int r;
+
+       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+       TEST_ASSERT(r == 0,
+                   "pthread_setcanceltype failed with errno=%d",
+                   r);
+
+       vcpu_run(params->vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+                   "Expect UCALL_SYNC");
+       TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
+
+       printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
+
+       inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
+       vcpu_run(params->vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+       TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+                   "Expect UCALL_SYNC");
+       TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
+
+       printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
+
+       inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
+       vcpu_run(params->vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+       if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
+               TEST_ASSERT(false, "vCPU assertion failure: %s.",
+                           (const char *)uc.args[0]);
+       }
+
+       return NULL;
+}
+
+static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
+{
+       struct kvm_vm *vm = vcpu->vm;
+       params->vcpu = vcpu;
+       params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
+       params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
+       params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
+       params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+
+       run_ucna_injection(params);
+
+       TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
+       TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
+                   "Only first UCNA reported addr get recorded via interrupt.");
+       TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
+                   "First injected UCNAs should get exposed via registers.");
+       TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
+                   "Second injected UCNAs should get exposed via registers.");
+
+       printf("Test successful.\n"
+              "UCNA CMCI interrupts received: %ld\n"
+              "Last UCNA address received via CMCI: %lx\n"
+              "First UCNA address in vCPU thread: %lx\n"
+              "Second UCNA address in vCPU thread: %lx\n",
+              *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
+              *params->p_ucna_addr, *params->p_ucna_addr2);
+}
+
+static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
+{
+       uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+       if (enable_cmci_p)
+               mcg_caps |= MCG_CMCI_P;
+
+       mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
+       vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
+}
+
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+                                                bool enable_cmci_p, void *guest_code)
+{
+       struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
+       setup_mce_cap(vcpu, enable_cmci_p);
+       return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+       struct thread_params params;
+       struct kvm_vm *vm;
+       struct kvm_vcpu *ucna_vcpu;
+       struct kvm_vcpu *cmcidis_vcpu;
+       struct kvm_vcpu *cmci_vcpu;
+
+       kvm_check_cap(KVM_CAP_MCE);
+
+       vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
+
+       kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
+                 &supported_mcg_caps);
+
+       if (!(supported_mcg_caps & MCG_CMCI_P)) {
+               print_skip("MCG_CMCI_P is not supported");
+               exit(KSFT_SKIP);
+       }
+
+       ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
+       cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
+       cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
+
+       vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
+       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+       test_ucna_injection(ucna_vcpu, &params);
+       run_vcpu_expect_gp(cmcidis_vcpu);
+       run_vcpu_expect_gp(cmci_vcpu);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
new file mode 100644 (file)
index 0000000..9481cbc
--- /dev/null
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+{
+       unsigned long end;
+
+       if (count == 2)
+               end = (unsigned long)buffer + 1;
+       else
+               end = (unsigned long)buffer + 8192;
+
+       asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
+       GUEST_ASSERT_EQ(count, 0);
+       GUEST_ASSERT_EQ((unsigned long)buffer, end);
+}
+
+static void guest_code(void)
+{
+       uint8_t buffer[8192];
+       int i;
+
+       /*
+        * Special case tests.  main() will adjust RCX 2 => 1 and 3 => 8192 to
+        * test that KVM doesn't explode when userspace modifies the "count" on
+        * a userspace I/O exit.  KVM isn't required to play nice with the I/O
+        * itself as KVM doesn't support manipulating the count, it just needs
+        * to not explode or overflow a buffer.
+        */
+       guest_ins_port80(buffer, 2);
+       guest_ins_port80(buffer, 3);
+
+       /* Verify KVM fills the buffer correctly when not stuffing RCX. */
+       memset(buffer, 0, sizeof(buffer));
+       guest_ins_port80(buffer, 8192);
+       for (i = 0; i < 8192; i++)
+               __GUEST_ASSERT(buffer[i] == 0xaa,
+                              "Expected '0xaa', got '0x%x' at buffer[%u]",
+                              buffer[i], i);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_regs regs;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       memset(&regs, 0, sizeof(regs));
+
+       while (1) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               if (get_ucall(vcpu, &uc))
+                       break;
+
+               TEST_ASSERT(run->io.port == 0x80,
+                           "Expected I/O at port 0x80, got port 0x%x", run->io.port);
+
+               /*
+                * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
+                * Note, this abuses KVM's batching of rep string I/O to avoid
+                * getting stuck in an infinite loop.  That behavior isn't in
+                * scope from a testing perspective as it's not ABI in any way,
+                * i.e. it really is abusing internal KVM knowledge.
+                */
+               vcpu_regs_get(vcpu, &regs);
+               if (regs.rcx == 2)
+                       regs.rcx = 1;
+               if (regs.rcx == 3)
+                       regs.rcx = 8192;
+               memset((void *)run + run->io.data_offset, 0xaa, 4096);
+               vcpu_regs_set(vcpu, &regs);
+       }
+
+       switch (uc.cmd) {
+       case UCALL_DONE:
+               break;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+       }
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
new file mode 100644 (file)
index 0000000..32b2794
--- /dev/null
@@ -0,0 +1,769 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for exiting into userspace on registered MSRs
+ */
+#include <sys/ioctl.h>
+
+#include "kvm_test_harness.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MSR_NON_EXISTENT 0x474f4f00
+
+static u64 deny_bits = 0;
+struct kvm_msr_filter filter_allow = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test an MSR the kernel knows about. */
+                       .base = MSR_IA32_XSS,
+                       .bitmap = (uint8_t*)&deny_bits,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test an MSR the kernel doesn't know about. */
+                       .base = MSR_IA32_FLUSH_CMD,
+                       .bitmap = (uint8_t*)&deny_bits,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test a fabricated MSR that no one knows about. */
+                       .base = MSR_NON_EXISTENT,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+struct kvm_msr_filter filter_fs = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .nmsrs = 1,
+                       .base = MSR_FS_BASE,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+struct kvm_msr_filter filter_gs = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .nmsrs = 1,
+                       .base = MSR_GS_BASE,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+static uint64_t msr_non_existent_data;
+static int guest_exception_count;
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+       u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+       bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+       memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+       memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+       memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+       memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+       memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+       deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+       deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+       deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter_deny = {
+       .flags = KVM_MSR_FILTER_DEFAULT_DENY,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .base = 0x00000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_00000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE,
+                       .base = 0x00000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_00000000_write,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+                       .base = 0x40000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_40000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .base = 0xc0000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_c0000000_read,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE,
+                       .base = 0xc0000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_c0000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+                       .base = 0xdeadbeef,
+                       .nmsrs = 1,
+                       .bitmap = bitmap_deadbeef,
+               },
+       },
+};
+
+struct kvm_msr_filter no_filter_deny = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+/*
+ * Note: Force test_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
+                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
+                       "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char rdmsr_start, rdmsr_end;
+extern char wrmsr_start, wrmsr_end;
+
+/*
+ * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_em_rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
+                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
+                       "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char em_rdmsr_start, em_rdmsr_end;
+extern char em_wrmsr_start, em_wrmsr_end;
+
+static void guest_code_filter_allow(void)
+{
+       uint64_t data;
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
+        *
+        * A GP is thrown if anything other than 0 is written to
+        * MSR_IA32_XSS.
+        */
+       data = test_rdmsr(MSR_IA32_XSS);
+       GUEST_ASSERT(data == 0);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       test_wrmsr(MSR_IA32_XSS, 0);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       test_wrmsr(MSR_IA32_XSS, 1);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
+        *
+        * A GP is thrown if MSR_IA32_FLUSH_CMD is read
+        * from or if a value other than 1 is written to it.
+        */
+       test_rdmsr(MSR_IA32_FLUSH_CMD);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
+        *
+        * Test that a fabricated MSR can pass through the kernel
+        * and be handled in userspace.
+        */
+       test_wrmsr(MSR_NON_EXISTENT, 2);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       data = test_rdmsr(MSR_NON_EXISTENT);
+       GUEST_ASSERT(data == 2);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       if (is_forced_emulation_enabled) {
+               /* Let userspace know we aren't done. */
+               GUEST_SYNC(0);
+
+               /*
+                * Now run the same tests with the instruction emulator.
+                */
+               data = test_em_rdmsr(MSR_IA32_XSS);
+               GUEST_ASSERT(data == 0);
+               GUEST_ASSERT(guest_exception_count == 0);
+               test_em_wrmsr(MSR_IA32_XSS, 0);
+               GUEST_ASSERT(guest_exception_count == 0);
+               test_em_wrmsr(MSR_IA32_XSS, 1);
+               GUEST_ASSERT(guest_exception_count == 1);
+
+               test_em_rdmsr(MSR_IA32_FLUSH_CMD);
+               GUEST_ASSERT(guest_exception_count == 1);
+               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+               GUEST_ASSERT(guest_exception_count == 1);
+               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+               GUEST_ASSERT(guest_exception_count == 0);
+
+               test_em_wrmsr(MSR_NON_EXISTENT, 2);
+               GUEST_ASSERT(guest_exception_count == 0);
+               data = test_em_rdmsr(MSR_NON_EXISTENT);
+               GUEST_ASSERT(data == 2);
+               GUEST_ASSERT(guest_exception_count == 0);
+       }
+
+       GUEST_DONE();
+}
+
+static void guest_msr_calls(bool trapped)
+{
+       /* This goes into the in-kernel emulation */
+       wrmsr(MSR_SYSCALL_MASK, 0);
+
+       if (trapped) {
+               /* This goes into user space emulation */
+               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+               GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+       } else {
+               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+               GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+       }
+
+       /* If trapped == true, this goes into user space emulation */
+       wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+       /* This goes into the in-kernel emulation */
+       rdmsr(MSR_IA32_POWER_CTL);
+
+       /* Invalid MSR, should always be handled by user space exit */
+       GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+       wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code_filter_deny(void)
+{
+       guest_msr_calls(true);
+
+       /*
+        * Disable msr filtering, so that the kernel
+        * handles everything in the next round
+        */
+       GUEST_SYNC(0);
+
+       guest_msr_calls(false);
+
+       GUEST_DONE();
+}
+
+static void guest_code_permission_bitmap(void)
+{
+       uint64_t data;
+
+       data = test_rdmsr(MSR_FS_BASE);
+       GUEST_ASSERT(data == MSR_FS_BASE);
+       data = test_rdmsr(MSR_GS_BASE);
+       GUEST_ASSERT(data != MSR_GS_BASE);
+
+       /* Let userspace know to switch the filter */
+       GUEST_SYNC(0);
+
+       data = test_rdmsr(MSR_FS_BASE);
+       GUEST_ASSERT(data != MSR_FS_BASE);
+       data = test_rdmsr(MSR_GS_BASE);
+       GUEST_ASSERT(data == MSR_GS_BASE);
+
+       GUEST_DONE();
+}
+
+static void __guest_gp_handler(struct ex_regs *regs,
+                              char *r_start, char *r_end,
+                              char *w_start, char *w_end)
+{
+       if (regs->rip == (uintptr_t)r_start) {
+               regs->rip = (uintptr_t)r_end;
+               regs->rax = 0;
+               regs->rdx = 0;
+       } else if (regs->rip == (uintptr_t)w_start) {
+               regs->rip = (uintptr_t)w_end;
+       } else {
+               GUEST_ASSERT(!"RIP is at an unknown location!");
+       }
+
+       ++guest_exception_count;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+       __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
+                          &wrmsr_start, &wrmsr_end);
+}
+
+static void guest_fep_gp_handler(struct ex_regs *regs)
+{
+       __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
+                          &em_wrmsr_start, &em_wrmsr_end);
+}
+
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+           get_ucall(vcpu, &uc) == UCALL_ABORT) {
+               REPORT_GUEST_ASSERT(uc);
+       }
+}
+
+static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+       struct kvm_run *run = vcpu->run;
+
+       check_for_guest_assert(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
+       TEST_ASSERT(run->msr.index == msr_index,
+                       "Unexpected msr (0x%04x), expected 0x%04x",
+                       run->msr.index, msr_index);
+
+       switch (run->msr.index) {
+       case MSR_IA32_XSS:
+               run->msr.data = 0;
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               run->msr.error = 1;
+               break;
+       case MSR_NON_EXISTENT:
+               run->msr.data = msr_non_existent_data;
+               break;
+       case MSR_FS_BASE:
+               run->msr.data = MSR_FS_BASE;
+               break;
+       case MSR_GS_BASE:
+               run->msr.data = MSR_GS_BASE;
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+       }
+}
+
+static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+       struct kvm_run *run = vcpu->run;
+
+       check_for_guest_assert(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
+       TEST_ASSERT(run->msr.index == msr_index,
+                       "Unexpected msr (0x%04x), expected 0x%04x",
+                       run->msr.index, msr_index);
+
+       switch (run->msr.index) {
+       case MSR_IA32_XSS:
+               if (run->msr.data != 0)
+                       run->msr.error = 1;
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               if (run->msr.data != 1)
+                       run->msr.error = 1;
+               break;
+       case MSR_NON_EXISTENT:
+               msr_non_existent_data = run->msr.data;
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+       }
+}
+
+static void process_ucall_done(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       check_for_guest_assert(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
+                   "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+                   uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc = {};
+
+       check_for_guest_assert(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+               break;
+       case UCALL_ABORT:
+               check_for_guest_assert(vcpu);
+               break;
+       case UCALL_DONE:
+               process_ucall_done(vcpu);
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected ucall");
+       }
+
+       return uc.cmd;
+}
+
+static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
+                                        uint32_t msr_index)
+{
+       vcpu_run(vcpu);
+       process_rdmsr(vcpu, msr_index);
+}
+
+static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
+                                        uint32_t msr_index)
+{
+       vcpu_run(vcpu);
+       process_wrmsr(vcpu, msr_index);
+}
+
+static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
+{
+       vcpu_run(vcpu);
+       return process_ucall(vcpu);
+}
+
+static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
+{
+       vcpu_run(vcpu);
+       process_ucall_done(vcpu);
+}
+
+KVM_ONE_VCPU_TEST_SUITE(user_msr);
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
+{
+       struct kvm_vm *vm = vcpu->vm;
+       uint64_t cmd;
+       int rc;
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
+
+       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+       /* Process guest code userspace exits. */
+       run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+       run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+       run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+       run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+       run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+       run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+       run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+       run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+       vcpu_run(vcpu);
+       cmd = process_ucall(vcpu);
+
+       if (is_forced_emulation_enabled) {
+               TEST_ASSERT_EQ(cmd, UCALL_SYNC);
+               vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
+
+               /* Process emulated rdmsr and wrmsr instructions. */
+               run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+               run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+               run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+               run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+               run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+               run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+               run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+               run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+               /* Confirm the guest completed without issues. */
+               run_guest_then_process_ucall_done(vcpu);
+       } else {
+               TEST_ASSERT_EQ(cmd, UCALL_DONE);
+               printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
+       }
+}
+
+static int handle_ucall(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_SYNC:
+               vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+               break;
+       case UCALL_DONE:
+               return 1;
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+       }
+
+       return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+       run->msr.data = run->msr.index;
+       msr_reads++;
+
+       if (run->msr.index == MSR_SYSCALL_MASK ||
+           run->msr.index == MSR_GS_BASE) {
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+                           "MSR read trap w/o access fault");
+       }
+
+       if (run->msr.index == 0xdeadbeef) {
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+                           "MSR deadbeef read trap w/o inval fault");
+       }
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+       /* ignore */
+       msr_writes++;
+
+       if (run->msr.index == MSR_IA32_POWER_CTL) {
+               TEST_ASSERT(run->msr.data == 0x1234,
+                           "MSR data for MSR_IA32_POWER_CTL incorrect");
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+                           "MSR_IA32_POWER_CTL trap w/o access fault");
+       }
+
+       if (run->msr.index == 0xdeadbeef) {
+               TEST_ASSERT(run->msr.data == 0x1234,
+                           "MSR data for deadbeef incorrect");
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+                           "deadbeef trap w/o inval fault");
+       }
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
+{
+       struct kvm_vm *vm = vcpu->vm;
+       struct kvm_run *run = vcpu->run;
+       int rc;
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
+                                                     KVM_MSR_EXIT_REASON_UNKNOWN |
+                                                     KVM_MSR_EXIT_REASON_FILTER);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       prepare_bitmaps();
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
+
+       while (1) {
+               vcpu_run(vcpu);
+
+               switch (run->exit_reason) {
+               case KVM_EXIT_X86_RDMSR:
+                       handle_rdmsr(run);
+                       break;
+               case KVM_EXIT_X86_WRMSR:
+                       handle_wrmsr(run);
+                       break;
+               case KVM_EXIT_IO:
+                       if (handle_ucall(vcpu))
+                               goto done;
+                       break;
+               }
+
+       }
+
+done:
+       TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+       TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
+{
+       struct kvm_vm *vm = vcpu->vm;
+       int rc;
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
+       run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
+       TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
+                   "Expected ucall state to be UCALL_SYNC.");
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
+       run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+       run_guest_then_process_ucall_done(vcpu);
+}
+
+#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask)       \
+({                                                                     \
+       int r = __vm_ioctl(vm, cmd, arg);                               \
+                                                                       \
+       if (flag & valid_mask)                                          \
+               TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r));            \
+       else                                                            \
+               TEST_ASSERT(r == -1 && errno == EINVAL,                 \
+                           "Wanted EINVAL for %s with flag = 0x%llx, got  rc: %i errno: %i (%s)", \
+                           #cmd, flag, r, errno,  strerror(errno));    \
+})
+
+static void run_user_space_msr_flag_test(struct kvm_vm *vm)
+{
+       struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
+       int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
+       int rc;
+       int i;
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+
+       for (i = 0; i < nflags; i++) {
+               cap.args[0] = BIT_ULL(i);
+               test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
+                          BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
+       }
+}
+
+static void run_msr_filter_flag_test(struct kvm_vm *vm)
+{
+       u64 deny_bits = 0;
+       struct kvm_msr_filter filter = {
+               .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+               .ranges = {
+                       {
+                               .flags = KVM_MSR_FILTER_READ,
+                               .nmsrs = 1,
+                               .base = 0,
+                               .bitmap = (uint8_t *)&deny_bits,
+                       },
+               },
+       };
+       int nflags;
+       int rc;
+       int i;
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       nflags = sizeof(filter.flags) * BITS_PER_BYTE;
+       for (i = 0; i < nflags; i++) {
+               filter.flags = BIT_ULL(i);
+               test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+                          BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
+       }
+
+       filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
+       nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
+       for (i = 0; i < nflags; i++) {
+               filter.ranges[0].flags = BIT_ULL(i);
+               test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+                          BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
+       }
+}
+
+/* Test that attempts to write to the unused bits in a flag fails. */
+KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
+{
+       struct kvm_vm *vm = vcpu->vm;
+
+       /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
+       run_user_space_msr_flag_test(vm);
+
+       /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
+       run_msr_filter_flag_test(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
new file mode 100644 (file)
index 0000000..a81a247
--- /dev/null
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_apic_access_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * The first subtest simply checks to see that an L2 guest can be
+ * launched with a valid APIC-access address that is backed by a
+ * page of L1 physical memory.
+ *
+ * The second subtest sets the APIC-access address to a (valid) L1
+ * physical address that is not backed by memory. KVM can't handle
+ * this situation, so resuming L2 should result in a KVM exit for
+ * internal error (emulation). This is not an architectural
+ * requirement. It is just a shortcoming of KVM. The internal error
+ * is unfortunate, but it's better than what used to happen!
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+static void l2_guest_code(void)
+{
+       /* Exit to L1 */
+       __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint32_t control;
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+       control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+       control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+       control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+       vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+       vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
+
+       /* Try to launch L2 with the memory-backed APIC-access address. */
+       GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       vmwrite(APIC_ACCESS_ADDR, high_gpa);
+
+       /* Try to resume L2 with the unbacked APIC-access address. */
+       GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+       GUEST_ASSERT(!vmresume());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned long apic_access_addr = ~0ul;
+       vm_vaddr_t vmx_pages_gva;
+       unsigned long high_gpa;
+       struct vmx_pages *vmx;
+       bool done = false;
+
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       high_gpa = (vm->max_gfn - 1) << vm->page_shift;
+
+       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       prepare_virtualize_apic_accesses(vmx, vm);
+       vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
+
+       while (!done) {
+               volatile struct kvm_run *run = vcpu->run;
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               if (apic_access_addr == high_gpa) {
+                       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+                       TEST_ASSERT(run->internal.suberror ==
+                                   KVM_INTERNAL_ERROR_EMULATION,
+                                   "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
+                                   run->internal.suberror);
+                       break;
+               }
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       apic_access_addr = uc.args[1];
+                       break;
+               case UCALL_DONE:
+                       done = true;
+                       break;
+               default:
+                       TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
+               }
+       }
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
new file mode 100644 (file)
index 0000000..dad9883
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_close_while_nested
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Verify that nothing bad happens if a KVM user exits with open
+ * file descriptors while executing a nested guest.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+enum {
+       PORT_L0_EXIT = 0x2000,
+};
+
+static void l2_guest_code(void)
+{
+       /* Exit to L0 */
+       asm volatile("inb %%dx, %%al"
+                    : : [port] "d" (PORT_L0_EXIT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       /* Allocate VMX pages and shared descriptors (vmx_pages). */
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       for (;;) {
+               volatile struct kvm_run *run = vcpu->run;
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               if (run->io.port == PORT_L0_EXIT)
+                       break;
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
new file mode 100644 (file)
index 0000000..fa512d0
--- /dev/null
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX            1
+#define TEST_MEM_PAGES                 3
+
+/* L1 guest test virtual memory offset */
+#define GUEST_TEST_MEM                 0xc0000000
+
+/* L2 guest test virtual memory offset */
+#define NESTED_TEST_MEM1               0xc0001000
+#define NESTED_TEST_MEM2               0xc0002000
+
+static void l2_guest_code(u64 *a, u64 *b)
+{
+       READ_ONCE(*a);
+       WRITE_ONCE(*a, 1);
+       GUEST_SYNC(true);
+       GUEST_SYNC(false);
+
+       WRITE_ONCE(*b, 1);
+       GUEST_SYNC(true);
+       WRITE_ONCE(*b, 1);
+       GUEST_SYNC(true);
+       GUEST_SYNC(false);
+
+       /* Exit to L1 and never come back.  */
+       vmcall();
+}
+
+static void l2_guest_code_ept_enabled(void)
+{
+       l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
+}
+
+static void l2_guest_code_ept_disabled(void)
+{
+       /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
+       l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
+}
+
+void l1_guest_code(struct vmx_pages *vmx)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       void *l2_rip;
+
+       GUEST_ASSERT(vmx->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+       GUEST_ASSERT(load_vmcs(vmx));
+
+       if (vmx->eptp_gpa)
+               l2_rip = l2_guest_code_ept_enabled;
+       else
+               l2_rip = l2_guest_code_ept_disabled;
+
+       prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       GUEST_SYNC(false);
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_SYNC(false);
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       GUEST_DONE();
+}
+
+static void test_vmx_dirty_log(bool enable_ept)
+{
+       vm_vaddr_t vmx_pages_gva = 0;
+       struct vmx_pages *vmx;
+       unsigned long *bmap;
+       uint64_t *host_test_mem;
+
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       bool done = false;
+
+       pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       /* Add an extra memory slot for testing dirty logging */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   GUEST_TEST_MEM,
+                                   TEST_MEM_SLOT_INDEX,
+                                   TEST_MEM_PAGES,
+                                   KVM_MEM_LOG_DIRTY_PAGES);
+
+       /*
+        * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
+        * affects both L1 and L2.  However...
+        */
+       virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
+
+       /*
+        * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
+        * 0xc0000000.
+        *
+        * Note that prepare_eptp should be called only L1's GPA map is done,
+        * meaning after the last call to virt_map.
+        *
+        * When EPT is disabled, the L2 guest code will still access the same L1
+        * GPAs as the EPT enabled case.
+        */
+       if (enable_ept) {
+               prepare_eptp(vmx, vm, 0);
+               nested_map_memslot(vmx, vm, 0);
+               nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+               nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+       }
+
+       bmap = bitmap_zalloc(TEST_MEM_PAGES);
+       host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
+
+       while (!done) {
+               memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       /*
+                        * The nested guest wrote at offset 0x1000 in the memslot, but the
+                        * dirty bitmap must be filled in according to L1 GPA, not L2.
+                        */
+                       kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+                       if (uc.args[1]) {
+                               TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
+                               TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
+                       } else {
+                               TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
+                               TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
+                       }
+
+                       TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
+                       TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+                       TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
+                       TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
+                       break;
+               case UCALL_DONE:
+                       done = true;
+                       break;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       test_vmx_dirty_log(/*enable_ept=*/false);
+
+       if (kvm_cpu_has_ept())
+               test_vmx_dirty_log(/*enable_ept=*/true);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
new file mode 100644 (file)
index 0000000..3fd6ece
--- /dev/null
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       /* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+       asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+
+       vcpu_run(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+       TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+                   "Expected emulation failure, got %d",
+                   run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Always run twice to verify KVM handles the case where _KVM_ queues
+        * an exception with invalid state and then exits to userspace, i.e.
+        * that KVM doesn't explode if userspace ignores the initial error.
+        */
+       __run_vcpu_with_invalid_state(vcpu);
+       __run_vcpu_with_invalid_state(vcpu);
+}
+
+static void set_timer(void)
+{
+       struct itimerval timer;
+
+       timer.it_value.tv_sec  = 0;
+       timer.it_value.tv_usec = 200;
+       timer.it_interval = timer.it_value;
+       TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
+{
+       static struct kvm_sregs sregs;
+
+       if (!sregs.cr0)
+               vcpu_sregs_get(vcpu, &sregs);
+       sregs.tr.unusable = !!set;
+       vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+       set_or_clear_invalid_guest_state(vcpu, true);
+}
+
+static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+       set_or_clear_invalid_guest_state(vcpu, false);
+}
+
+static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
+{
+       static struct kvm_vcpu *vcpu = NULL;
+
+       if (__vcpu)
+               vcpu = __vcpu;
+       return vcpu;
+}
+
+static void sigalrm_handler(int sig)
+{
+       struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
+       struct kvm_vcpu_events events;
+
+       TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+       vcpu_events_get(vcpu, &events);
+
+       /*
+        * If an exception is pending, attempt KVM_RUN with invalid guest,
+        * otherwise rearm the timer and keep doing so until the timer fires
+        * between KVM queueing an exception and re-entering the guest.
+        */
+       if (events.exception.pending) {
+               set_invalid_guest_state(vcpu);
+               run_vcpu_with_invalid_state(vcpu);
+       } else {
+               set_timer();
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(host_cpu_is_intel);
+       TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       get_set_sigalrm_vcpu(vcpu);
+
+       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+       /*
+        * Stuff invalid guest state for L2 by making TR unusuable.  The next
+        * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+        * emulating invalid guest state for L2.
+        */
+       set_invalid_guest_state(vcpu);
+       run_vcpu_with_invalid_state(vcpu);
+
+       /*
+        * Verify KVM also handles the case where userspace gains control while
+        * an exception is pending and stuffs invalid state.  Run with valid
+        * guest state and a timer firing every 200us, and attempt to enter the
+        * guest with invalid state when the handler interrupts KVM with an
+        * exception pending.
+        */
+       clear_invalid_guest_state(vcpu);
+       TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+                   "Failed to register SIGALRM handler, errno = %d (%s)",
+                   errno, strerror(errno));
+
+       set_timer();
+       run_vcpu_with_invalid_state(vcpu);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
new file mode 100644 (file)
index 0000000..a100ee5
--- /dev/null
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+       /*
+        * Generate an exit to L0 userspace, i.e. main(), via I/O to an
+        * arbitrary port.
+        */
+       asm volatile("inb %%dx, %%al"
+                    : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /*
+        * L2 must be run without unrestricted guest, verify that the selftests
+        * library hasn't enabled it.  Because KVM selftests jump directly to
+        * 64-bit mode, unrestricted guest support isn't required.
+        */
+       GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
+                    !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
+
+       GUEST_ASSERT(!vmlaunch());
+
+       /* L2 should triple fault after main() stuffs invalid guest state. */
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva;
+       struct kvm_sregs sregs;
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+       /* Allocate VMX pages and shared descriptors (vmx_pages). */
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       vcpu_run(vcpu);
+
+       run = vcpu->run;
+
+       /*
+        * The first exit to L0 userspace should be an I/O access from L2.
+        * Running L1 should launch L2 without triggering an exit to userspace.
+        */
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+                   "Expected IN from port %d from L2, got port %d",
+                   ARBITRARY_IO_PORT, run->io.port);
+
+       /*
+        * Stuff invalid guest state for L2 by making TR unusuable.  The next
+        * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+        * emulating invalid guest state for L2.
+        */
+       memset(&sregs, 0, sizeof(sregs));
+       vcpu_sregs_get(vcpu, &sregs);
+       sregs.tr.unusable = 1;
+       vcpu_sregs_set(vcpu, &sregs);
+
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_DONE:
+               break;
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+       default:
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+       }
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
new file mode 100644 (file)
index 0000000..90720b6
--- /dev/null
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX control MSR test
+ *
+ * Copyright (C) 2022 Google LLC.
+ *
+ * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
+ * that KVM will set owned bits where appropriate, and will not if
+ * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
+ */
+#include <linux/bitmap.h>
+#include "kvm_util.h"
+#include "vmx.h"
+
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+                                 uint64_t mask)
+{
+       uint64_t val = vcpu_get_msr(vcpu, msr_index);
+       uint64_t bit;
+
+       mask &= val;
+
+       for_each_set_bit(bit, &mask, 64) {
+               vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
+               vcpu_set_msr(vcpu, msr_index, val);
+       }
+}
+
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+                               uint64_t mask)
+{
+       uint64_t val = vcpu_get_msr(vcpu, msr_index);
+       uint64_t bit;
+
+       mask = ~mask | val;
+
+       for_each_clear_bit(bit, &mask, 64) {
+               vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
+               vcpu_set_msr(vcpu, msr_index, val);
+       }
+}
+
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+       vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
+       vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
+}
+
+static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
+{
+       vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
+       vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
+
+       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
+                           BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
+
+       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
+                           BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
+                           BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
+
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
+       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
+       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
+       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
+}
+
+static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
+                                           uint64_t msr_bit,
+                                           struct kvm_x86_cpu_feature feature)
+{
+       uint64_t val;
+
+       vcpu_clear_cpuid_feature(vcpu, feature);
+
+       val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
+
+       if (!kvm_cpu_has(feature))
+               return;
+
+       vcpu_set_cpuid_feature(vcpu, feature);
+}
+
+static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
+{
+       uint64_t supported_bits = FEAT_CTL_LOCKED |
+                                 FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
+                                 FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
+                                 FEAT_CTL_SGX_LC_ENABLED |
+                                 FEAT_CTL_SGX_ENABLED |
+                                 FEAT_CTL_LMCE_ENABLED;
+       int bit, r;
+
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
+       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
+
+       for_each_clear_bit(bit, &supported_bits, 64) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
+               TEST_ASSERT(r == 0,
+                           "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
+       }
+}
+
+int main(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       /* No need to actually do KVM_RUN, thus no guest code. */
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       vmx_save_restore_msrs_test(vcpu);
+       ia32_feature_control_msr_test(vcpu);
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
new file mode 100644 (file)
index 0000000..1759fa5
--- /dev/null
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec)         ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+       uint64_t tolerance, thresh_low, thresh_high;
+
+       tolerance = expected / 100;
+       thresh_low = expected - tolerance;
+       thresh_high = expected + tolerance;
+
+       TEST_ASSERT(thresh_low < actual,
+               "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+               " but it actually is %"PRIu64,
+               thresh_low, thresh_high, actual);
+       TEST_ASSERT(thresh_high > actual,
+               "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+               " but it actually is %"PRIu64,
+               thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+       uint64_t tsc_start, tsc_end, tsc_freq;
+
+       /*
+        * Reading the TSC twice with about a second's difference should give
+        * us an approximation of the TSC frequency from the guest's
+        * perspective. Now, this won't be completely accurate, but it should
+        * be good enough for the purposes of this test.
+        */
+       tsc_start = rdmsr(MSR_IA32_TSC);
+       GUEST_SLEEP(1);
+       tsc_end = rdmsr(MSR_IA32_TSC);
+
+       tsc_freq = tsc_end - tsc_start;
+
+       GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+       check_tsc_freq(UCHECK_L2);
+
+       /* exit to L1 */
+       __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint32_t control;
+
+       /* check that L1's frequency looks alright before launching L2 */
+       check_tsc_freq(UCHECK_L1);
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* prepare the VMCS for L2 execution */
+       prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* enable TSC offsetting and TSC scaling for L2 */
+       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+       control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+       control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+       control |= SECONDARY_EXEC_TSC_SCALING;
+       vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+       vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+       vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+       vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+       /* launch L2 */
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       /* check that L1's frequency still looks good */
+       check_tsc_freq(UCHECK_L1);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       vm_vaddr_t vmx_pages_gva;
+
+       uint64_t tsc_start, tsc_end;
+       uint64_t tsc_khz;
+       uint64_t l1_scale_factor;
+       uint64_t l0_tsc_freq = 0;
+       uint64_t l1_tsc_freq = 0;
+       uint64_t l2_tsc_freq = 0;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
+       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+       /*
+        * We set L1's scale factor to be a random number from 2 to 10.
+        * Ideally we would do the same for L2's factor but that one is
+        * referenced by both main() and l1_guest_code() and using a global
+        * variable does not work.
+        */
+       srand(time(NULL));
+       l1_scale_factor = (rand() % 9) + 2;
+       printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+       printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+       tsc_start = rdtsc();
+       sleep(1);
+       tsc_end = rdtsc();
+
+       l0_tsc_freq = tsc_end - tsc_start;
+       printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
+       TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+       /* scale down L1's TSC frequency */
+       vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
+
+       for (;;) {
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               case UCALL_SYNC:
+                       switch (uc.args[0]) {
+                       case USLEEP:
+                               sleep(uc.args[1]);
+                               break;
+                       case UCHECK_L1:
+                               l1_tsc_freq = uc.args[1];
+                               printf("L1's TSC frequency is around: %"PRIu64
+                                      "\n", l1_tsc_freq);
+
+                               compare_tsc_freq(l1_tsc_freq,
+                                                l0_tsc_freq / l1_scale_factor);
+                               break;
+                       case UCHECK_L2:
+                               l2_tsc_freq = uc.args[1];
+                               printf("L2's TSC frequency is around: %"PRIu64
+                                      "\n", l2_tsc_freq);
+
+                               compare_tsc_freq(l2_tsc_freq,
+                                                l1_tsc_freq * L2_SCALE_FACTOR);
+                               break;
+                       }
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
new file mode 100644 (file)
index 0000000..a1f5ff4
--- /dev/null
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for VMX-pmu perf capability msr
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings on
+ * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
+ * we write with KVM_SET_MSR is _not_ modified by the guest
+ * and check it can be retrieved with KVM_GET_MSR, also test
+ * the invalid LBR formats are rejected.
+ */
+#include <sys/ioctl.h>
+
+#include <linux/bitmap.h>
+
+#include "kvm_test_harness.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+static union perf_capabilities {
+       struct {
+               u64     lbr_format:6;
+               u64     pebs_trap:1;
+               u64     pebs_arch_reg:1;
+               u64     pebs_format:4;
+               u64     smm_freeze:1;
+               u64     full_width_write:1;
+               u64 pebs_baseline:1;
+               u64     perf_metrics:1;
+               u64     pebs_output_pt_available:1;
+               u64     anythread_deprecated:1;
+       };
+       u64     capabilities;
+} host_cap;
+
+/*
+ * The LBR format and most PEBS features are immutable, all other features are
+ * fungible (if supported by the host and KVM).
+ */
+static const union perf_capabilities immutable_caps = {
+       .lbr_format = -1,
+       .pebs_trap  = 1,
+       .pebs_arch_reg = 1,
+       .pebs_format = -1,
+       .pebs_baseline = 1,
+};
+
+static const union perf_capabilities format_caps = {
+       .lbr_format = -1,
+       .pebs_format = -1,
+};
+
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+       uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+       __GUEST_ASSERT(vector == GP_VECTOR,
+                      "Expected #GP for value '0x%lx', got vector '0x%x'",
+                      val, vector);
+}
+
+static void guest_code(uint64_t current_val)
+{
+       int i;
+
+       guest_test_perf_capabilities_gp(current_val);
+       guest_test_perf_capabilities_gp(0);
+
+       for (i = 0; i < 64; i++)
+               guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
+
+       GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
+
+/*
+ * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
+ * written, that the guest always sees the userspace controlled value, and that
+ * PERF_CAPABILITIES is immutable after KVM_RUN.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
+{
+       struct ucall uc;
+       int r, i;
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+       vcpu_args_set(vcpu, 1, host_cap.capabilities);
+       vcpu_run(vcpu);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_ABORT:
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       case UCALL_DONE:
+               break;
+       default:
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+       }
+
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+                       host_cap.capabilities);
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+       r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+       TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
+
+       for (i = 0; i < 64; i++) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+                                 host_cap.capabilities ^ BIT_ULL(i));
+               TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
+                           host_cap.capabilities ^ BIT_ULL(i));
+       }
+}
+
+/*
+ * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
+ * enabled, as well as '0' (to disable all features).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
+{
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
+{
+       const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+       int bit;
+
+       for_each_set_bit(bit, &fungible_caps, 64) {
+               vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
+               vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+                            host_cap.capabilities & ~BIT_ULL(bit));
+       }
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+/*
+ * Verify KVM rejects attempts to set unsupported and/or immutable features in
+ * PERF_CAPABILITIES.  Note, LBR format and PEBS format need to be validated
+ * separately as they are multi-bit values, e.g. toggling or setting a single
+ * bit can generate a false positive without dedicated safeguards.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
+{
+       const uint64_t reserved_caps = (~host_cap.capabilities |
+                                       immutable_caps.capabilities) &
+                                      ~format_caps.capabilities;
+       union perf_capabilities val = host_cap;
+       int r, bit;
+
+       for_each_set_bit(bit, &reserved_caps, 64) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+                                 host_cap.capabilities ^ BIT_ULL(bit));
+               TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
+                           host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
+                           BIT_ULL(bit), bit);
+       }
+
+       /*
+        * KVM only supports the host's native LBR format, as well as '0' (to
+        * disable LBR support).  Verify KVM rejects all other LBR formats.
+        */
+       for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
+               if (val.lbr_format == host_cap.lbr_format)
+                       continue;
+
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+               TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
+                           val.lbr_format, host_cap.lbr_format);
+       }
+
+       /* Ditto for the PEBS format. */
+       for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
+               if (val.pebs_format == host_cap.pebs_format)
+                       continue;
+
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+               TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
+                           val.pebs_format, host_cap.pebs_format);
+       }
+}
+
+/*
+ * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
+ * disabling the vPMU via CPUID also disables LBR support.  Set bits 2:0 of
+ * LBR_TOS as those bits are writable across all uarch implementations (arch
+ * LBRs will need to poke a different MSR).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
+{
+       int r;
+
+       if (!host_cap.lbr_format)
+               return;
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+       vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+
+       vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
+
+       r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+       TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
+{
+       uint64_t val;
+       int i, r;
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+       val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+       TEST_ASSERT_EQ(val, host_cap.capabilities);
+
+       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM);
+
+       val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+       TEST_ASSERT_EQ(val, 0);
+
+       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+
+       for (i = 0; i < 64; i++) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i));
+               TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM",
+                           i, BIT_ULL(i));
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_is_pmu_enabled());
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+       host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+
+       TEST_ASSERT(host_cap.full_width_write,
+                   "Full-width writes should always be supported");
+
+       return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
new file mode 100644 (file)
index 0000000..00dd2ac
--- /dev/null
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define PREEMPTION_TIMER_VALUE                 100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1       80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
+void l2_guest_code(void)
+{
+       u64 vmx_pt_delta;
+
+       vmcall();
+       l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+       /*
+        * Wait until the 1st threshold has passed
+        */
+       do {
+               l2_vmx_pt_finish = rdtsc();
+               vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+                               vmx_pt_rate;
+       } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+       /*
+        * Force L2 through Save and Restore cycle
+        */
+       GUEST_SYNC(1);
+
+       l2_save_restore_done = 1;
+
+       /*
+        * Now wait for the preemption timer to fire and
+        * exit to L1
+        */
+       while ((l2_vmx_pt_finish = rdtsc()))
+               ;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       u64 l1_vmx_pt_start;
+       u64 l1_vmx_pt_finish;
+       u64 l1_tsc_deadline, l2_tsc_deadline;
+
+       GUEST_ASSERT(vmx_pages->vmcs_gpa);
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /*
+        * Check for Preemption timer support
+        */
+       basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+       ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+                       : MSR_IA32_VMX_PINBASED_CTLS);
+       ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+                       : MSR_IA32_VMX_EXIT_CTLS);
+
+       if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+           !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+               return;
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+
+       /*
+        * Turn on PIN control and resume the guest
+        */
+       GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+                             vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+                             PIN_BASED_VMX_PREEMPTION_TIMER));
+
+       GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+                             PREEMPTION_TIMER_VALUE));
+
+       vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+       l2_save_restore_done = 0;
+
+       l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+       GUEST_ASSERT(!vmresume());
+
+       l1_vmx_pt_finish = rdtsc();
+
+       /*
+        * Ensure exit from L2 happens after L2 goes through
+        * save and restore
+        */
+       GUEST_ASSERT(l2_save_restore_done);
+
+       /*
+        * Ensure the exit from L2 is due to preemption timer expiry
+        */
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+       l1_tsc_deadline = l1_vmx_pt_start +
+               (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+       l2_tsc_deadline = l2_vmx_pt_start +
+               (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+       /*
+        * Sync with the host and pass the l1|l2 pt_expiry_finish times and
+        * tsc deadlines so that host can verify they are as expected
+        */
+       GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+               l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+       if (vmx_pages)
+               l1_guest_code(vmx_pages);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva = 0;
+
+       struct kvm_regs regs1, regs2;
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       struct kvm_x86_state *state;
+       struct ucall uc;
+       int stage;
+
+       /*
+        * AMD currently does not implement any VMX features, so for now we
+        * just early out.
+        */
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       vcpu_regs_get(vcpu, &regs1);
+
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       for (stage = 1;; stage++) {
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               /* UCALL_SYNC is handled here.  */
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+               /*
+                * If this stage 2 then we should verify the vmx pt expiry
+                * is as expected.
+                * From L1's perspective verify Preemption timer hasn't
+                * expired too early.
+                * From L2's perspective verify Preemption timer hasn't
+                * expired too late.
+                */
+               if (stage == 2) {
+
+                       pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+                               stage, uc.args[2], uc.args[3]);
+
+                       pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+                               stage, uc.args[4], uc.args[5]);
+
+                       TEST_ASSERT(uc.args[2] >= uc.args[3],
+                               "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+                               stage, uc.args[2], uc.args[3]);
+
+                       TEST_ASSERT(uc.args[4] < uc.args[5],
+                               "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+                               stage, uc.args[4], uc.args[5]);
+               }
+
+               state = vcpu_save_state(vcpu);
+               memset(&regs1, 0, sizeof(regs1));
+               vcpu_regs_get(vcpu, &regs1);
+
+               kvm_vm_release(vm);
+
+               /* Restore state in a new VM.  */
+               vcpu = vm_recreate_with_one_vcpu(vm);
+               vcpu_load_state(vcpu, state);
+               kvm_x86_state_cleanup(state);
+
+               memset(&regs2, 0, sizeof(regs2));
+               vcpu_regs_get(vcpu, &regs2);
+               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+                           (ulong) regs2.rdi, (ulong) regs2.rsi);
+       }
+
+done:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
new file mode 100644 (file)
index 0000000..67a62a5
--- /dev/null
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_set_nested_state_test
+ *
+ * Copyright (C) 2019, Google LLC.
+ *
+ * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <errno.h>
+#include <linux/kvm.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+/*
+ * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value
+ * changes this should be updated.
+ */
+#define VMCS12_REVISION 0x11e57ed0
+
+bool have_evmcs;
+
+void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
+{
+       vcpu_nested_state_set(vcpu, state);
+}
+
+void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
+                                   struct kvm_nested_state *state,
+                                   int expected_errno)
+{
+       int rv;
+
+       rv = __vcpu_nested_state_set(vcpu, state);
+       TEST_ASSERT(rv == -1 && errno == expected_errno,
+               "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
+               strerror(expected_errno), expected_errno, rv, strerror(errno),
+               errno);
+}
+
+void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
+                                    struct kvm_nested_state *state)
+{
+       test_nested_state_expect_errno(vcpu, state, EINVAL);
+}
+
+void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
+                                    struct kvm_nested_state *state)
+{
+       test_nested_state_expect_errno(vcpu, state, EFAULT);
+}
+
+void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
+                               u32 vmcs12_revision)
+{
+       /* Set revision_id in vmcs12 to vmcs12_revision. */
+       memcpy(&state->data, &vmcs12_revision, sizeof(u32));
+}
+
+void set_default_state(struct kvm_nested_state *state)
+{
+       memset(state, 0, sizeof(*state));
+       state->flags = KVM_STATE_NESTED_RUN_PENDING |
+                      KVM_STATE_NESTED_GUEST_MODE;
+       state->format = 0;
+       state->size = sizeof(*state);
+}
+
+void set_default_vmx_state(struct kvm_nested_state *state, int size)
+{
+       memset(state, 0, size);
+       if (have_evmcs)
+               state->flags = KVM_STATE_NESTED_EVMCS;
+       state->format = 0;
+       state->size = size;
+       state->hdr.vmx.vmxon_pa = 0x1000;
+       state->hdr.vmx.vmcs12_pa = 0x2000;
+       state->hdr.vmx.smm.flags = 0;
+       set_revision_id_for_vmcs12(state, VMCS12_REVISION);
+}
+
+void test_vmx_nested_state(struct kvm_vcpu *vcpu)
+{
+       /* Add a page for VMCS12. */
+       const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+       struct kvm_nested_state *state =
+               (struct kvm_nested_state *)malloc(state_sz);
+
+       /* The format must be set to 0. 0 for VMX, 1 for SVM. */
+       set_default_vmx_state(state, state_sz);
+       state->format = 1;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * We cannot virtualize anything if the guest does not have VMX
+        * enabled.
+        */
+       set_default_vmx_state(state, state_sz);
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * We cannot virtualize anything if the guest does not have VMX
+        * enabled.  We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa
+        * is set to -1ull, but the flags must be zero.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = -1ull;
+       test_nested_state_expect_einval(vcpu, state);
+
+       state->hdr.vmx.vmcs12_pa = -1ull;
+       state->flags = KVM_STATE_NESTED_EVMCS;
+       test_nested_state_expect_einval(vcpu, state);
+
+       state->flags = 0;
+       test_nested_state(vcpu, state);
+
+       /* Enable VMX in the guest CPUID. */
+       vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
+
+       /*
+        * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
+        * setting the nested state. When the eVMCS flag is not set, the
+        * expected return value is '0'.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->flags = 0;
+       state->hdr.vmx.vmxon_pa = -1ull;
+       state->hdr.vmx.vmcs12_pa = -1ull;
+       test_nested_state(vcpu, state);
+
+       /*
+        * When eVMCS is supported, the eVMCS flag can only be set if the
+        * enlightened VMCS capability has been enabled.
+        */
+       if (have_evmcs) {
+               state->flags = KVM_STATE_NESTED_EVMCS;
+               test_nested_state_expect_einval(vcpu, state);
+               vcpu_enable_evmcs(vcpu);
+               test_nested_state(vcpu, state);
+       }
+
+       /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
+       state->hdr.vmx.smm.flags = 1;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* Invalid flags are rejected. */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.flags = ~0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = -1ull;
+       state->flags = 0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* It is invalid to have vmxon_pa set to a non-page aligned address. */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = 1;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
+        * KVM_STATE_NESTED_GUEST_MODE set together.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->flags = KVM_STATE_NESTED_GUEST_MODE  |
+                     KVM_STATE_NESTED_RUN_PENDING;
+       state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * It is invalid to have any of the SMM flags set besides:
+        *      KVM_STATE_NESTED_SMM_GUEST_MODE
+        *      KVM_STATE_NESTED_SMM_VMXON
+        */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
+                               KVM_STATE_NESTED_SMM_VMXON);
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* Outside SMM, SMM flags must be zero. */
+       set_default_vmx_state(state, state_sz);
+       state->flags = 0;
+       state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * Size must be large enough to fit kvm_nested_state and vmcs12
+        * if VMCS12 physical address is set
+        */
+       set_default_vmx_state(state, state_sz);
+       state->size = sizeof(*state);
+       state->flags = 0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       set_default_vmx_state(state, state_sz);
+       state->size = sizeof(*state);
+       state->flags = 0;
+       state->hdr.vmx.vmcs12_pa = -1;
+       test_nested_state(vcpu, state);
+
+       /*
+        * KVM_SET_NESTED_STATE succeeds with invalid VMCS
+        * contents but L2 not running.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->flags = 0;
+       test_nested_state(vcpu, state);
+
+       /* Invalid flags are rejected, even if no VMCS loaded. */
+       set_default_vmx_state(state, state_sz);
+       state->size = sizeof(*state);
+       state->flags = 0;
+       state->hdr.vmx.vmcs12_pa = -1;
+       state->hdr.vmx.flags = ~0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /* vmxon_pa cannot be the same address as vmcs_pa. */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = 0;
+       state->hdr.vmx.vmcs12_pa = 0;
+       test_nested_state_expect_einval(vcpu, state);
+
+       /*
+        * Test that if we leave nesting the state reflects that when we get
+        * it again.
+        */
+       set_default_vmx_state(state, state_sz);
+       state->hdr.vmx.vmxon_pa = -1ull;
+       state->hdr.vmx.vmcs12_pa = -1ull;
+       state->flags = 0;
+       test_nested_state(vcpu, state);
+       vcpu_nested_state_get(vcpu, state);
+       TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+                   "Size must be between %ld and %d.  The size returned was %d.",
+                   sizeof(*state), state_sz, state->size);
+       TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
+       TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
+
+       free(state);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+       struct kvm_nested_state state;
+       struct kvm_vcpu *vcpu;
+
+       have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
+
+       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+       /*
+        * AMD currently does not implement set_nested_state, so for now we
+        * just early out.
+        */
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       /*
+        * First run tests with VMX disabled to check error handling.
+        */
+       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+
+       /* Passing a NULL kvm_nested_state causes a EFAULT. */
+       test_nested_state_expect_efault(vcpu, NULL);
+
+       /* 'size' cannot be smaller than sizeof(kvm_nested_state). */
+       set_default_state(&state);
+       state.size = 0;
+       test_nested_state_expect_einval(vcpu, &state);
+
+       /*
+        * Setting the flags 0xf fails the flags check.  The only flags that
+        * can be used are:
+        *     KVM_STATE_NESTED_GUEST_MODE
+        *     KVM_STATE_NESTED_RUN_PENDING
+        *     KVM_STATE_NESTED_EVMCS
+        */
+       set_default_state(&state);
+       state.flags = 0xf;
+       test_nested_state_expect_einval(vcpu, &state);
+
+       /*
+        * If KVM_STATE_NESTED_RUN_PENDING is set then
+        * KVM_STATE_NESTED_GUEST_MODE has to be set as well.
+        */
+       set_default_state(&state);
+       state.flags = KVM_STATE_NESTED_RUN_PENDING;
+       test_nested_state_expect_einval(vcpu, &state);
+
+       test_vmx_nested_state(vcpu);
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
new file mode 100644 (file)
index 0000000..2ceb5c7
--- /dev/null
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_tsc_adjust_test
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+       PORT_ABORT = 0x1000,
+       PORT_REPORT,
+       PORT_DONE,
+};
+
+enum {
+       VMXON_PAGE = 0,
+       VMCS_PAGE,
+       MSR_BITMAP_PAGE,
+
+       NUM_VMX_PAGES,
+};
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+       int64_t adjust;
+
+       adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+       GUEST_SYNC(adjust);
+       GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+       uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+       wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+       check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+       /* Exit to L1 */
+       __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       uint32_t control;
+       uintptr_t save_cr3;
+
+       GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
+       wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+       check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+       control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+       vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+       /* Jump into L2.  First, test failure to load guest CR3.  */
+       save_cr3 = vmreadz(GUEST_CR3);
+       vmwrite(GUEST_CR3, -1ull);
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+                    (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+       check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+       vmwrite(GUEST_CR3, save_cr3);
+
+       GUEST_ASSERT(!vmlaunch());
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+       check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+       GUEST_DONE();
+}
+
+static void report(int64_t val)
+{
+       pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+               val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva;
+       struct kvm_vcpu *vcpu;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+       vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
+
+       /* Allocate VMX pages and shared descriptors (vmx_pages). */
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+       for (;;) {
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       report(uc.args[1]);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
new file mode 100644 (file)
index 0000000..a76078a
--- /dev/null
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * xapic_ipi_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
+ * another vCPU that is halted when KVM's backing page for the APIC access
+ * address has been moved by mm.
+ *
+ * The test starts two vCPUs: one that sends IPIs and one that continually
+ * executes HLT. The sender checks that the halter has woken from the HLT and
+ * has reentered HLT before sending the next IPI. While the vCPUs are running,
+ * the host continually calls migrate_pages to move all of the process' pages
+ * amongst the available numa nodes on the machine.
+ *
+ * Migration is a command line option. When used on non-numa machines will 
+ * exit with error. Test is still usefull on non-numa for testing IPIs.
+ */
+#include <getopt.h>
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "numaif.h"
+#include "processor.h"
+#include "test_util.h"
+#include "vmx.h"
+
+/* Default running time for the test */
+#define DEFAULT_RUN_SECS 3
+
+/* Default delay between migrate_pages calls (microseconds) */
+#define DEFAULT_DELAY_USECS 500000
+
+/*
+ * Vector for IPI from sender vCPU to halting vCPU.
+ * Value is arbitrary and was chosen for the alternating bit pattern. Any
+ * value should work.
+ */
+#define IPI_VECTOR      0xa5
+
+/*
+ * Incremented in the IPI handler. Provides evidence to the sender that the IPI
+ * arrived at the destination
+ */
+static volatile uint64_t ipis_rcvd;
+
+/* Data struct shared between host main thread and vCPUs */
+struct test_data_page {
+       uint32_t halter_apic_id;
+       volatile uint64_t hlt_count;
+       volatile uint64_t wake_count;
+       uint64_t ipis_sent;
+       uint64_t migrations_attempted;
+       uint64_t migrations_completed;
+       uint32_t icr;
+       uint32_t icr2;
+       uint32_t halter_tpr;
+       uint32_t halter_ppr;
+
+       /*
+        *  Record local version register as a cross-check that APIC access
+        *  worked. Value should match what KVM reports (APIC_VERSION in
+        *  arch/x86/kvm/lapic.c). If test is failing, check that values match
+        *  to determine whether APIC access exits are working.
+        */
+       uint32_t halter_lvr;
+};
+
+struct thread_params {
+       struct test_data_page *data;
+       struct kvm_vcpu *vcpu;
+       uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+};
+
+void verify_apic_base_addr(void)
+{
+       uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+       uint64_t base = GET_APIC_BASE(msr);
+
+       GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void halter_guest_code(struct test_data_page *data)
+{
+       verify_apic_base_addr();
+       xapic_enable();
+
+       data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+       data->halter_lvr = xapic_read_reg(APIC_LVR);
+
+       /*
+        * Loop forever HLTing and recording halts & wakes. Disable interrupts
+        * each time around to minimize window between signaling the pending
+        * halt to the sender vCPU and executing the halt. No need to disable on
+        * first run as this vCPU executes first and the host waits for it to
+        * signal going into first halt before starting the sender vCPU. Record
+        * TPR and PPR for diagnostic purposes in case the test fails.
+        */
+       for (;;) {
+               data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+               data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
+               data->hlt_count++;
+               asm volatile("sti; hlt; cli");
+               data->wake_count++;
+       }
+}
+
+/*
+ * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
+ * enable diagnosing errant writes to the APIC access address backing page in
+ * case of test failure.
+ */
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+       ipis_rcvd++;
+       xapic_write_reg(APIC_EOI, 77);
+}
+
+static void sender_guest_code(struct test_data_page *data)
+{
+       uint64_t last_wake_count;
+       uint64_t last_hlt_count;
+       uint64_t last_ipis_rcvd_count;
+       uint32_t icr_val;
+       uint32_t icr2_val;
+       uint64_t tsc_start;
+
+       verify_apic_base_addr();
+       xapic_enable();
+
+       /*
+        * Init interrupt command register for sending IPIs
+        *
+        * Delivery mode=fixed, per SDM:
+        *   "Delivers the interrupt specified in the vector field to the target
+        *    processor."
+        *
+        * Destination mode=physical i.e. specify target by its local APIC
+        * ID. This vCPU assumes that the halter vCPU has already started and
+        * set data->halter_apic_id.
+        */
+       icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
+       icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
+       data->icr = icr_val;
+       data->icr2 = icr2_val;
+
+       last_wake_count = data->wake_count;
+       last_hlt_count = data->hlt_count;
+       last_ipis_rcvd_count = ipis_rcvd;
+       for (;;) {
+               /*
+                * Send IPI to halter vCPU.
+                * First IPI can be sent unconditionally because halter vCPU
+                * starts earlier.
+                */
+               xapic_write_reg(APIC_ICR2, icr2_val);
+               xapic_write_reg(APIC_ICR, icr_val);
+               data->ipis_sent++;
+
+               /*
+                * Wait up to ~1 sec for halter to indicate that it has:
+                * 1. Received the IPI
+                * 2. Woken up from the halt
+                * 3. Gone back into halt
+                * Current CPUs typically run at 2.x Ghz which is ~2
+                * billion ticks per second.
+                */
+               tsc_start = rdtsc();
+               while (rdtsc() - tsc_start < 2000000000) {
+                       if ((ipis_rcvd != last_ipis_rcvd_count) &&
+                           (data->wake_count != last_wake_count) &&
+                           (data->hlt_count != last_hlt_count))
+                               break;
+               }
+
+               GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
+                            (data->wake_count != last_wake_count) &&
+                            (data->hlt_count != last_hlt_count));
+
+               last_wake_count = data->wake_count;
+               last_hlt_count = data->hlt_count;
+               last_ipis_rcvd_count = ipis_rcvd;
+       }
+}
+
+static void *vcpu_thread(void *arg)
+{
+       struct thread_params *params = (struct thread_params *)arg;
+       struct kvm_vcpu *vcpu = params->vcpu;
+       struct ucall uc;
+       int old;
+       int r;
+
+       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+       TEST_ASSERT(r == 0,
+                   "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+                   vcpu->id, r);
+
+       fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
+       vcpu_run(vcpu);
+
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+       if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
+               TEST_ASSERT(false,
+                           "vCPU %u exited with error: %s.\n"
+                           "Sending vCPU sent %lu IPIs to halting vCPU\n"
+                           "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+                           "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+                           "Migrations attempted: %lu\n"
+                           "Migrations completed: %lu",
+                           vcpu->id, (const char *)uc.args[0],
+                           params->data->ipis_sent, params->data->hlt_count,
+                           params->data->wake_count,
+                           *params->pipis_rcvd, params->data->halter_tpr,
+                           params->data->halter_ppr, params->data->halter_lvr,
+                           params->data->migrations_attempted,
+                           params->data->migrations_completed);
+       }
+
+       return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+       void *retval;
+       int r;
+
+       r = pthread_cancel(thread);
+       TEST_ASSERT(r == 0,
+                   "pthread_cancel on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+
+       r = pthread_join(thread, &retval);
+       TEST_ASSERT(r == 0,
+                   "pthread_join on vcpu_id=%d failed with errno=%d",
+                   vcpu->id, r);
+       TEST_ASSERT(retval == PTHREAD_CANCELED,
+                   "expected retval=%p, got %p", PTHREAD_CANCELED,
+                   retval);
+}
+
+void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
+                  uint64_t *pipis_rcvd)
+{
+       long pages_not_moved;
+       unsigned long nodemask = 0;
+       unsigned long nodemasks[sizeof(nodemask) * 8];
+       int nodes = 0;
+       time_t start_time, last_update, now;
+       time_t interval_secs = 1;
+       int i, r;
+       int from, to;
+       unsigned long bit;
+       uint64_t hlt_count;
+       uint64_t wake_count;
+       uint64_t ipis_sent;
+
+       fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
+               delay_usecs);
+
+       /* Get set of first 64 numa nodes available */
+       r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+                         0, MPOL_F_MEMS_ALLOWED);
+       TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
+
+       fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
+               "(each 1-bit indicates node is present): %#lx\n",
+               sizeof(nodemask) * 8, nodemask);
+
+       /* Init array of masks containing a single-bit in each, one for each
+        * available node. migrate_pages called below requires specifying nodes
+        * as bit masks.
+        */
+       for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
+               if (nodemask & bit) {
+                       nodemasks[nodes] = nodemask & bit;
+                       nodes++;
+               }
+       }
+
+       TEST_ASSERT(nodes > 1,
+                   "Did not find at least 2 numa nodes. Can't do migration");
+
+       fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
+
+       from = 0;
+       to = 1;
+       start_time = time(NULL);
+       last_update = start_time;
+
+       ipis_sent = data->ipis_sent;
+       hlt_count = data->hlt_count;
+       wake_count = data->wake_count;
+
+       while ((int)(time(NULL) - start_time) < run_secs) {
+               data->migrations_attempted++;
+
+               /*
+                * migrate_pages with PID=0 will migrate all pages of this
+                * process between the nodes specified as bitmasks. The page
+                * backing the APIC access address belongs to this process
+                * because it is allocated by KVM in the context of the
+                * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
+                * test may break or give a false positive signal.
+                */
+               pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
+                                               &nodemasks[from],
+                                               &nodemasks[to]);
+               if (pages_not_moved < 0)
+                       fprintf(stderr,
+                               "migrate_pages failed, errno=%d\n", errno);
+               else if (pages_not_moved > 0)
+                       fprintf(stderr,
+                               "migrate_pages could not move %ld pages\n",
+                               pages_not_moved);
+               else
+                       data->migrations_completed++;
+
+               from = to;
+               to++;
+               if (to == nodes)
+                       to = 0;
+
+               now = time(NULL);
+               if (((now - start_time) % interval_secs == 0) &&
+                   (now != last_update)) {
+                       last_update = now;
+                       fprintf(stderr,
+                               "%lu seconds: Migrations attempted=%lu completed=%lu, "
+                               "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
+                               now - start_time, data->migrations_attempted,
+                               data->migrations_completed,
+                               data->ipis_sent, *pipis_rcvd,
+                               data->hlt_count, data->wake_count);
+
+                       TEST_ASSERT(ipis_sent != data->ipis_sent &&
+                                   hlt_count != data->hlt_count &&
+                                   wake_count != data->wake_count,
+                                   "IPI, HLT and wake count have not increased "
+                                   "in the last %lu seconds. "
+                                   "HLTer is likely hung.", interval_secs);
+
+                       ipis_sent = data->ipis_sent;
+                       hlt_count = data->hlt_count;
+                       wake_count = data->wake_count;
+               }
+               usleep(delay_usecs);
+       }
+}
+
+void get_cmdline_args(int argc, char *argv[], int *run_secs,
+                     bool *migrate, int *delay_usecs)
+{
+       for (;;) {
+               int opt = getopt(argc, argv, "s:d:m");
+
+               if (opt == -1)
+                       break;
+               switch (opt) {
+               case 's':
+                       *run_secs = parse_size(optarg);
+                       break;
+               case 'm':
+                       *migrate = true;
+                       break;
+               case 'd':
+                       *delay_usecs = parse_size(optarg);
+                       break;
+               default:
+                       TEST_ASSERT(false,
+                                   "Usage: -s <runtime seconds>. Default is %d seconds.\n"
+                                   "-m adds calls to migrate_pages while vCPUs are running."
+                                   " Default is no migrations.\n"
+                                   "-d <delay microseconds> - delay between migrate_pages() calls."
+                                   " Default is %d microseconds.",
+                                   DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
+               }
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       int r;
+       int wait_secs;
+       const int max_halter_wait = 10;
+       int run_secs = 0;
+       int delay_usecs = 0;
+       struct test_data_page *data;
+       vm_vaddr_t test_data_page_vaddr;
+       bool migrate = false;
+       pthread_t threads[2];
+       struct thread_params params[2];
+       struct kvm_vm *vm;
+       uint64_t *pipis_rcvd;
+
+       get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
+       if (run_secs <= 0)
+               run_secs = DEFAULT_RUN_SECS;
+       if (delay_usecs <= 0)
+               delay_usecs = DEFAULT_DELAY_USECS;
+
+       vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
+
+       vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+       params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
+
+       test_data_page_vaddr = vm_vaddr_alloc_page(vm);
+       data = addr_gva2hva(vm, test_data_page_vaddr);
+       memset(data, 0, sizeof(*data));
+       params[0].data = data;
+       params[1].data = data;
+
+       vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
+       vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
+
+       pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+       params[0].pipis_rcvd = pipis_rcvd;
+       params[1].pipis_rcvd = pipis_rcvd;
+
+       /* Start halter vCPU thread and wait for it to execute first HLT. */
+       r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
+       TEST_ASSERT(r == 0,
+                   "pthread_create halter failed errno=%d", errno);
+       fprintf(stderr, "Halter vCPU thread started\n");
+
+       wait_secs = 0;
+       while ((wait_secs < max_halter_wait) && !data->hlt_count) {
+               sleep(1);
+               wait_secs++;
+       }
+
+       TEST_ASSERT(data->hlt_count,
+                   "Halter vCPU did not execute first HLT within %d seconds",
+                   max_halter_wait);
+
+       fprintf(stderr,
+               "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
+               data->halter_apic_id, wait_secs);
+
+       r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
+       TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
+
+       fprintf(stderr,
+               "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
+               run_secs);
+
+       if (!migrate)
+               sleep(run_secs);
+       else
+               do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
+
+       /*
+        * Cancel threads and wait for them to stop.
+        */
+       cancel_join_vcpu_thread(threads[0], params[0].vcpu);
+       cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+
+       fprintf(stderr,
+               "Test successful after running for %d seconds.\n"
+               "Sending vCPU sent %lu IPIs to halting vCPU\n"
+               "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+               "Halter APIC ID=%#x\n"
+               "Sender ICR value=%#x ICR2 value=%#x\n"
+               "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+               "Migrations attempted: %lu\n"
+               "Migrations completed: %lu\n",
+               run_secs, data->ipis_sent,
+               data->hlt_count, data->wake_count, *pipis_rcvd,
+               data->halter_apic_id,
+               data->icr, data->icr2,
+               data->halter_tpr, data->halter_ppr, data->halter_lvr,
+               data->migrations_attempted, data->migrations_completed);
+
+       kvm_vm_free(vm);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
new file mode 100644 (file)
index 0000000..88bcca1
--- /dev/null
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+struct xapic_vcpu {
+       struct kvm_vcpu *vcpu;
+       bool is_x2apic;
+       bool has_xavic_errata;
+};
+
+static void xapic_guest_code(void)
+{
+       asm volatile("cli");
+
+       xapic_enable();
+
+       while (1) {
+               uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+                              (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
+
+               xapic_write_reg(APIC_ICR2, val >> 32);
+               xapic_write_reg(APIC_ICR, val);
+               GUEST_SYNC(val);
+       }
+}
+
+#define X2APIC_RSVD_BITS_MASK  (GENMASK_ULL(31, 20) | \
+                               GENMASK_ULL(17, 16) | \
+                               GENMASK_ULL(13, 13))
+
+static void x2apic_guest_code(void)
+{
+       asm volatile("cli");
+
+       x2apic_enable();
+
+       do {
+               uint64_t val = x2apic_read_reg(APIC_IRR) |
+                              x2apic_read_reg(APIC_IRR + 0x10) << 32;
+
+               if (val & X2APIC_RSVD_BITS_MASK) {
+                       x2apic_write_reg_fault(APIC_ICR, val);
+               } else {
+                       x2apic_write_reg(APIC_ICR, val);
+                       GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val);
+               }
+               GUEST_SYNC(val);
+       } while (1);
+}
+
+static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+       struct kvm_vcpu *vcpu = x->vcpu;
+       struct kvm_lapic_state xapic;
+       struct ucall uc;
+       uint64_t icr;
+
+       /*
+        * Tell the guest what ICR value to write.  Use the IRR to pass info,
+        * all bits are valid and should not be modified by KVM (ignoring the
+        * fact that vectors 0-15 are technically illegal).
+        */
+       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+       *((u32 *)&xapic.regs[APIC_IRR]) = val;
+       *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
+       vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+
+       vcpu_run(vcpu);
+       TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+       TEST_ASSERT_EQ(uc.args[1], val);
+
+       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+       icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
+             (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
+       if (!x->is_x2apic) {
+               if (!x->has_xavic_errata)
+                       val &= (-1u | (0xffull << (32 + 24)));
+       } else if (val & X2APIC_RSVD_BITS_MASK) {
+               return;
+       }
+
+       if (x->has_xavic_errata)
+               TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+       else
+               TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+}
+
+static void __test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+       /*
+        * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
+        * it is as _must_ be zero.  Intel simply ignores the bit.  Don't test
+        * the BUSY bit for x2APIC, as there is no single correct behavior.
+        */
+       if (!x->is_x2apic)
+               ____test_icr(x, val | APIC_ICR_BUSY);
+
+       ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
+}
+
+static void test_icr(struct xapic_vcpu *x)
+{
+       struct kvm_vcpu *vcpu = x->vcpu;
+       uint64_t icr, i, j;
+
+       icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
+       for (i = 0; i <= 0xff; i++)
+               __test_icr(x, icr | i);
+
+       icr = APIC_INT_ASSERT | APIC_DM_FIXED;
+       for (i = 0; i <= 0xff; i++)
+               __test_icr(x, icr | i);
+
+       /*
+        * Send all flavors of IPIs to non-existent vCPUs.  TODO: use number of
+        * vCPUs, not vcpu.id + 1.  Arbitrarily use vector 0xff.
+        */
+       icr = APIC_INT_ASSERT | 0xff;
+       for (i = 0; i < 0xff; i++) {
+               if (i == vcpu->id)
+                       continue;
+               for (j = 0; j < 8; j++)
+                       __test_icr(x, i << (32 + 24) | icr | (j << 8));
+       }
+
+       /* And again with a shorthand destination for all types of IPIs. */
+       icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
+       for (i = 0; i < 8; i++)
+               __test_icr(x, icr | (i << 8));
+
+       /* And a few garbage value, just make sure it's an IRQ (blocked). */
+       __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
+       __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
+       __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
+}
+
+static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
+{
+       uint32_t apic_id, expected;
+       struct kvm_lapic_state xapic;
+
+       vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
+
+       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+       expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
+       apic_id = *((u32 *)&xapic.regs[APIC_ID]);
+
+       TEST_ASSERT(apic_id == expected,
+                   "APIC_ID not set back to %s format; wanted = %x, got = %x",
+                   (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
+                   expected, apic_id);
+}
+
+/*
+ * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
+ * stuffs MSR_IA32_APICBASE.  Setting the APIC_ID when x2APIC is enabled and
+ * when the APIC transitions for DISABLED to ENABLED is architectural behavior
+ * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
+ * attempted to transition from x2APIC to xAPIC without disabling the APIC is
+ * architecturally disallowed.
+ */
+static void test_apic_id(void)
+{
+       const uint32_t NR_VCPUS = 3;
+       struct kvm_vcpu *vcpus[NR_VCPUS];
+       uint64_t apic_base;
+       struct kvm_vm *vm;
+       int i;
+
+       vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
+       vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
+
+       for (i = 0; i < NR_VCPUS; i++) {
+               apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
+
+               TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
+                           "APIC not in ENABLED state at vCPU RESET");
+               TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
+                           "APIC not in xAPIC mode at vCPU RESET");
+
+               __test_apic_id(vcpus[i], apic_base);
+               __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
+               __test_apic_id(vcpus[i], apic_base);
+       }
+
+       kvm_vm_free(vm);
+}
+
+static void test_x2apic_id(void)
+{
+       struct kvm_lapic_state lapic = {};
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       int i;
+
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+       vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+
+       /*
+        * Try stuffing a modified x2APIC ID, KVM should ignore the value and
+        * always return the vCPU's default/readonly x2APIC ID.
+        */
+       for (i = 0; i <= 0xff; i++) {
+               *(u32 *)(lapic.regs + APIC_ID) = i << 24;
+               *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED;
+               vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+
+               vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
+               TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24,
+                           "x2APIC ID should be fully readonly");
+       }
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       struct xapic_vcpu x = {
+               .vcpu = NULL,
+               .is_x2apic = true,
+       };
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
+       test_icr(&x);
+       kvm_vm_free(vm);
+
+       /*
+        * Use a second VM for the xAPIC test so that x2APIC can be hidden from
+        * the guest in order to test AVIC.  KVM disallows changing CPUID after
+        * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+        */
+       vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
+       x.is_x2apic = false;
+
+       /*
+        * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit),
+        * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel
+        * drops writes, AMD does not).  Account for the errata when checking
+        * that KVM reads back what was written.
+        */
+       x.has_xavic_errata = host_cpu_is_amd &&
+                            get_kvm_amd_param_bool("avic");
+
+       vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
+
+       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+       test_icr(&x);
+       kvm_vm_free(vm);
+
+       test_apic_id();
+       test_x2apic_id();
+}
diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
new file mode 100644 (file)
index 0000000..c8a5c5e
--- /dev/null
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * XCR0 cpuid test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Assert that architectural dependency rules are satisfied, e.g. that AVX is
+ * supported if and only if SSE is supported.
+ */
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)          \
+do {                                                                                   \
+       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies));       \
+                                                                                       \
+       __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) ||                    \
+                      __supported == ((xfeatures) | (dependencies)),                   \
+                      "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx",  \
+                      __supported, (xfeatures), (dependencies));                       \
+} while (0)
+
+/*
+ * Assert that KVM reports a sane, usable as-is XCR0.  Architecturally, a CPU
+ * isn't strictly required to _support_ all XFeatures related to a feature, but
+ * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
+ * disabled coherently.  E.g. a CPU can technically enumerate supported for
+ * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
+ * XTILE_DATA will #GP.
+ */
+#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures)         \
+do {                                                                   \
+       uint64_t __supported = (supported_xcr0) & (xfeatures);          \
+                                                                       \
+       __GUEST_ASSERT(!__supported || __supported == (xfeatures),      \
+                      "supported = 0x%lx, xfeatures = 0x%llx",         \
+                      __supported, (xfeatures));                       \
+} while (0)
+
+static void guest_code(void)
+{
+       uint64_t initial_xcr0;
+       uint64_t supported_xcr0;
+       int i, vector;
+
+       set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+
+       initial_xcr0 = xgetbv(0);
+       supported_xcr0 = this_cpu_supported_xcr0();
+
+       GUEST_ASSERT(initial_xcr0 == supported_xcr0);
+
+       /* Check AVX */
+       ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+                                    XFEATURE_MASK_YMM,
+                                    XFEATURE_MASK_SSE);
+
+       /* Check MPX */
+       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+                                   XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+       /* Check AVX-512 */
+       ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+                                    XFEATURE_MASK_AVX512,
+                                    XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
+       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+                                   XFEATURE_MASK_AVX512);
+
+       /* Check AMX */
+       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+                                   XFEATURE_MASK_XTILE);
+
+       vector = xsetbv_safe(0, XFEATURE_MASK_FP);
+       __GUEST_ASSERT(!vector,
+                      "Expected success on XSETBV(FP), got vector '0x%x'",
+                      vector);
+
+       vector = xsetbv_safe(0, supported_xcr0);
+       __GUEST_ASSERT(!vector,
+                      "Expected success on XSETBV(0x%lx), got vector '0x%x'",
+                      supported_xcr0, vector);
+
+       for (i = 0; i < 64; i++) {
+               if (supported_xcr0 & BIT_ULL(i))
+                       continue;
+
+               vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
+                              BIT_ULL(i), supported_xcr0, vector);
+       }
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       while (1) {
+               vcpu_run(vcpu);
+
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                           "Unexpected exit reason: %u (%s),",
+                           run->exit_reason,
+                           exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
new file mode 100644 (file)
index 0000000..a59b3c7
--- /dev/null
@@ -0,0 +1,1161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include <sys/eventfd.h>
+
+#define SHINFO_REGION_GVA      0xc0000000ULL
+#define SHINFO_REGION_GPA      0xc0000000ULL
+#define SHINFO_REGION_SLOT     10
+
+#define DUMMY_REGION_GPA       (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT      11
+
+#define DUMMY_REGION_GPA_2     (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT_2    12
+
+#define SHINFO_ADDR    (SHINFO_REGION_GPA)
+#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
+#define PVTIME_ADDR    (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR  (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define SHINFO_VADDR   (SHINFO_REGION_GVA)
+#define VCPU_INFO_VADDR        (SHINFO_REGION_GVA + 0x40)
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define EVTCHN_VECTOR  0x10
+
+#define EVTCHN_TEST1 15
+#define EVTCHN_TEST2 66
+#define EVTCHN_TIMER 13
+
+enum {
+       TEST_INJECT_VECTOR = 0,
+       TEST_RUNSTATE_runnable,
+       TEST_RUNSTATE_blocked,
+       TEST_RUNSTATE_offline,
+       TEST_RUNSTATE_ADJUST,
+       TEST_RUNSTATE_DATA,
+       TEST_STEAL_TIME,
+       TEST_EVTCHN_MASKED,
+       TEST_EVTCHN_UNMASKED,
+       TEST_EVTCHN_SLOWPATH,
+       TEST_EVTCHN_SEND_IOCTL,
+       TEST_EVTCHN_HCALL,
+       TEST_EVTCHN_HCALL_SLOWPATH,
+       TEST_EVTCHN_HCALL_EVENTFD,
+       TEST_TIMER_SETUP,
+       TEST_TIMER_WAIT,
+       TEST_TIMER_RESTORE,
+       TEST_POLL_READY,
+       TEST_POLL_TIMEOUT,
+       TEST_POLL_MASKED,
+       TEST_POLL_WAKE,
+       SET_VCPU_INFO,
+       TEST_TIMER_PAST,
+       TEST_LOCKING_SEND_RACE,
+       TEST_LOCKING_POLL_RACE,
+       TEST_LOCKING_POLL_TIMEOUT,
+       TEST_DONE,
+
+       TEST_GUEST_SAW_IRQ,
+};
+
+#define XEN_HYPERCALL_MSR      0x40000000
+
+#define MIN_STEAL_TIME         50000
+
+#define SHINFO_RACE_TIMEOUT    2       /* seconds */
+
+#define __HYPERVISOR_set_timer_op      15
+#define __HYPERVISOR_sched_op          29
+#define __HYPERVISOR_event_channel_op  32
+
+#define SCHEDOP_poll                   3
+
+#define EVTCHNOP_send                  4
+
+#define EVTCHNSTAT_interdomain         2
+
+struct evtchn_send {
+       u32 port;
+};
+
+struct sched_poll {
+       u32 *ports;
+       unsigned int nr_ports;
+       u64 timeout;
+};
+
+struct pvclock_vcpu_time_info {
+       u32   version;
+       u32   pad0;
+       u64   tsc_timestamp;
+       u64   system_time;
+       u32   tsc_to_system_mul;
+       s8    tsc_shift;
+       u8    flags;
+       u8    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+       u32   version;
+       u32   sec;
+       u32   nsec;
+} __attribute__((__packed__));
+
+struct vcpu_runstate_info {
+       uint32_t state;
+       uint64_t state_entry_time;
+       uint64_t time[5]; /* Extra field for overrun check */
+};
+
+struct compat_vcpu_runstate_info {
+       uint32_t state;
+       uint64_t state_entry_time;
+       uint64_t time[5];
+} __attribute__((__packed__));
+
+struct arch_vcpu_info {
+       unsigned long cr2;
+       unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+       uint8_t evtchn_upcall_pending;
+       uint8_t evtchn_upcall_mask;
+       unsigned long evtchn_pending_sel;
+       struct arch_vcpu_info arch;
+       struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
+struct shared_info {
+       struct vcpu_info vcpu_info[32];
+       unsigned long evtchn_pending[64];
+       unsigned long evtchn_mask[64];
+       struct pvclock_wall_clock wc;
+       uint32_t wc_sec_hi;
+       /* arch_shared_info here */
+};
+
+#define RUNSTATE_running  0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked  2
+#define RUNSTATE_offline  3
+
+static const char *runstate_names[] = {
+       "running",
+       "runnable",
+       "blocked",
+       "offline"
+};
+
+struct {
+       struct kvm_irq_routing info;
+       struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
+static volatile bool guest_saw_irq;
+
+static void evtchn_handler(struct ex_regs *regs)
+{
+       struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+
+       vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0);
+       vcpu_arch_put_guest(vi->evtchn_pending_sel, 0);
+       guest_saw_irq = true;
+
+       GUEST_SYNC(TEST_GUEST_SAW_IRQ);
+}
+
+static void guest_wait_for_irq(void)
+{
+       while (!guest_saw_irq)
+               __asm__ __volatile__ ("rep nop" : : : "memory");
+       guest_saw_irq = false;
+}
+
+static void guest_code(void)
+{
+       struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+       int i;
+
+       __asm__ __volatile__(
+               "sti\n"
+               "nop\n"
+       );
+
+       /* Trigger an interrupt injection */
+       GUEST_SYNC(TEST_INJECT_VECTOR);
+
+       guest_wait_for_irq();
+
+       /* Test having the host set runstates manually */
+       GUEST_SYNC(TEST_RUNSTATE_runnable);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(TEST_RUNSTATE_blocked);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(TEST_RUNSTATE_offline);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       /* Test runstate time adjust */
+       GUEST_SYNC(TEST_RUNSTATE_ADJUST);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+       /* Test runstate time set */
+       GUEST_SYNC(TEST_RUNSTATE_DATA);
+       GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+       /* sched_yield() should result in some 'runnable' time */
+       GUEST_SYNC(TEST_STEAL_TIME);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
+       /* Attempt to deliver a *masked* interrupt */
+       GUEST_SYNC(TEST_EVTCHN_MASKED);
+
+       /* Wait until we see the bit set */
+       struct shared_info *si = (void *)SHINFO_VADDR;
+       while (!si->evtchn_pending[0])
+               __asm__ __volatile__ ("rep nop" : : : "memory");
+
+       /* Now deliver an *unmasked* interrupt */
+       GUEST_SYNC(TEST_EVTCHN_UNMASKED);
+
+       guest_wait_for_irq();
+
+       /* Change memslots and deliver an interrupt */
+       GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
+
+       guest_wait_for_irq();
+
+       /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
+       GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_EVTCHN_HCALL);
+
+       /* Our turn. Deliver event channel (to ourselves) with
+        * EVTCHNOP_send hypercall. */
+       struct evtchn_send s = { .port = 127 };
+       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
+
+       /*
+        * Same again, but this time the host has messed with memslots so it
+        * should take the slow path in kvm_xen_set_evtchn().
+        */
+       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
+
+       /* Deliver "outbound" event channel to an eventfd which
+        * happens to be one of our own irqfds. */
+       s.port = 197;
+       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_TIMER_SETUP);
+
+       /* Set a timer 100ms in the future. */
+       xen_hypercall(__HYPERVISOR_set_timer_op,
+                     rs->state_entry_time + 100000000, NULL);
+
+       GUEST_SYNC(TEST_TIMER_WAIT);
+
+       /* Now wait for the timer */
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_TIMER_RESTORE);
+
+       /* The host has 'restored' the timer. Just wait for it. */
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_POLL_READY);
+
+       /* Poll for an event channel port which is already set */
+       u32 ports[1] = { EVTCHN_TIMER };
+       struct sched_poll p = {
+               .ports = ports,
+               .nr_ports = 1,
+               .timeout = 0,
+       };
+
+       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       GUEST_SYNC(TEST_POLL_TIMEOUT);
+
+       /* Poll for an unset port and wait for the timeout. */
+       p.timeout = 100000000;
+       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       GUEST_SYNC(TEST_POLL_MASKED);
+
+       /* A timer will wake the masked port we're waiting on, while we poll */
+       p.timeout = 0;
+       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       GUEST_SYNC(TEST_POLL_WAKE);
+
+       /* Set the vcpu_info to point at exactly the place it already is to
+        * make sure the attribute is functional. */
+       GUEST_SYNC(SET_VCPU_INFO);
+
+       /* A timer wake an *unmasked* port which should wake us with an
+        * actual interrupt, while we're polling on a different port. */
+       ports[0]++;
+       p.timeout = 0;
+       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_TIMER_PAST);
+
+       /* Timer should have fired already */
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_LOCKING_SEND_RACE);
+       /* Racing host ioctls */
+
+       guest_wait_for_irq();
+
+       GUEST_SYNC(TEST_LOCKING_POLL_RACE);
+       /* Racing vmcall against host ioctl */
+
+       ports[0] = 0;
+
+       p = (struct sched_poll) {
+               .ports = ports,
+               .nr_ports = 1,
+               .timeout = 0
+       };
+
+wait_for_timer:
+       /*
+        * Poll for a timer wake event while the worker thread is mucking with
+        * the shared info.  KVM XEN drops timer IRQs if the shared info is
+        * invalid when the timer expires.  Arbitrarily poll 100 times before
+        * giving up and asking the VMM to re-arm the timer.  100 polls should
+        * consume enough time to beat on KVM without taking too long if the
+        * timer IRQ is dropped due to an invalid event channel.
+        */
+       for (i = 0; i < 100 && !guest_saw_irq; i++)
+               __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+       /*
+        * Re-send the timer IRQ if it was (likely) dropped due to the timer
+        * expiring while the event channel was invalid.
+        */
+       if (!guest_saw_irq) {
+               GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
+               goto wait_for_timer;
+       }
+       guest_saw_irq = false;
+
+       GUEST_SYNC(TEST_DONE);
+}
+
+static struct shared_info *shinfo;
+static struct vcpu_info *vinfo;
+static struct kvm_vcpu *vcpu;
+
+static void handle_alrm(int sig)
+{
+       if (vinfo)
+               printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
+       vcpu_dump(stdout, vcpu, 0);
+       TEST_FAIL("IRQ delivery timed out");
+}
+
+static void *juggle_shinfo_state(void *arg)
+{
+       struct kvm_vm *vm = (struct kvm_vm *)arg;
+
+       struct kvm_xen_hvm_attr cache_activate_gfn = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+               .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
+       };
+
+       struct kvm_xen_hvm_attr cache_deactivate_gfn = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+               .u.shared_info.gfn = KVM_XEN_INVALID_GFN
+       };
+
+       struct kvm_xen_hvm_attr cache_activate_hva = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
+               .u.shared_info.hva = (unsigned long)shinfo
+       };
+
+       struct kvm_xen_hvm_attr cache_deactivate_hva = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+               .u.shared_info.hva = 0
+       };
+
+       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+
+       for (;;) {
+               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
+               pthread_testcancel();
+               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
+
+               if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
+                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
+                       pthread_testcancel();
+                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
+               }
+       }
+
+       return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_xen_hvm_attr evt_reset;
+       struct kvm_vm *vm;
+       pthread_t thread;
+       bool verbose;
+       int ret;
+
+       verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+                              !strncmp(argv[1], "--verbose", 10));
+
+       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+       TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
+
+       bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+       bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
+       bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
+       bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
+       bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       /* Map a region for the shared_info page */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
+       virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
+
+       shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+       int zero_fd = open("/dev/zero", O_RDONLY);
+       TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
+       struct kvm_xen_hvm_config hvmc = {
+               .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+               .msr = XEN_HYPERCALL_MSR,
+       };
+
+       /* Let the kernel know that we *will* use it for sending all
+        * event channels, which lets it intercept SCHEDOP_poll */
+       if (do_evtchn_tests)
+               hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
+       vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+       struct kvm_xen_hvm_attr lm = {
+               .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
+               .u.long_mode = 1,
+       };
+       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+       if (do_runstate_flag) {
+               struct kvm_xen_hvm_attr ruf = {
+                       .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
+                       .u.runstate_update_flag = 1,
+               };
+               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
+
+               ruf.u.runstate_update_flag = 0;
+               vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
+               TEST_ASSERT(ruf.u.runstate_update_flag == 1,
+                           "Failed to read back RUNSTATE_UPDATE_FLAG attr");
+       }
+
+       struct kvm_xen_hvm_attr ha = {};
+
+       if (has_shinfo_hva) {
+               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
+               ha.u.shared_info.hva = (unsigned long)shinfo;
+       } else {
+               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
+               ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
+       }
+
+       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+
+       /*
+        * Test what happens when the HVA of the shinfo page is remapped after
+        * the kernel has a reference to it. But make sure we copy the clock
+        * info over since that's only set at setup time, and we test it later.
+        */
+       struct pvclock_wall_clock wc_copy = shinfo->wc;
+       void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+       TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+       shinfo->wc = wc_copy;
+
+       struct kvm_xen_vcpu_attr vi = {
+               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+               .u.gpa = VCPU_INFO_ADDR,
+       };
+       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
+
+       struct kvm_xen_vcpu_attr pvclock = {
+               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
+               .u.gpa = PVTIME_ADDR,
+       };
+       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+
+       struct kvm_xen_hvm_attr vec = {
+               .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+               .u.vector = EVTCHN_VECTOR,
+       };
+       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+       vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
+       if (do_runstate_tests) {
+               struct kvm_xen_vcpu_attr st = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+                       .u.gpa = RUNSTATE_ADDR,
+               };
+               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+       }
+
+       int irq_fd[2] = { -1, -1 };
+
+       if (do_eventfd_tests) {
+               irq_fd[0] = eventfd(0, 0);
+               irq_fd[1] = eventfd(0, 0);
+
+               /* Unexpected, but not a KVM failure */
+               if (irq_fd[0] == -1 || irq_fd[1] == -1)
+                       do_evtchn_tests = do_eventfd_tests = false;
+       }
+
+       if (do_eventfd_tests) {
+               irq_routes.info.nr = 2;
+
+               irq_routes.entries[0].gsi = 32;
+               irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+               irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
+               irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
+               irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+               irq_routes.entries[1].gsi = 33;
+               irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+               irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
+               irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
+               irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+               vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
+
+               struct kvm_irqfd ifd = { };
+
+               ifd.fd = irq_fd[0];
+               ifd.gsi = 32;
+               vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+               ifd.fd = irq_fd[1];
+               ifd.gsi = 33;
+               vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+               struct sigaction sa = { };
+               sa.sa_handler = handle_alrm;
+               sigaction(SIGALRM, &sa, NULL);
+       }
+
+       struct kvm_xen_vcpu_attr tmr = {
+               .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+               .u.timer.port = EVTCHN_TIMER,
+               .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+               .u.timer.expires_ns = 0
+       };
+
+       if (do_evtchn_tests) {
+               struct kvm_xen_hvm_attr inj = {
+                       .type = KVM_XEN_ATTR_TYPE_EVTCHN,
+                       .u.evtchn.send_port = 127,
+                       .u.evtchn.type = EVTCHNSTAT_interdomain,
+                       .u.evtchn.flags = 0,
+                       .u.evtchn.deliver.port.port = EVTCHN_TEST1,
+                       .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
+                       .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+               };
+               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+               /* Test migration to a different vCPU */
+               inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
+               inj.u.evtchn.deliver.port.vcpu = vcpu->id;
+               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+               inj.u.evtchn.send_port = 197;
+               inj.u.evtchn.deliver.eventfd.port = 0;
+               inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
+               inj.u.evtchn.flags = 0;
+               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+       }
+       vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+       vinfo->evtchn_upcall_pending = 0;
+
+       struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
+       rs->state = 0x5a;
+
+       bool evtchn_irq_expected = false;
+
+       for (;;) {
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC: {
+                       struct kvm_xen_vcpu_attr rst;
+                       long rundelay;
+
+                       if (do_runstate_tests)
+                               TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                                           rs->time[1] + rs->time[2] + rs->time[3],
+                                           "runstate times don't add up");
+
+                       switch (uc.args[1]) {
+                       case TEST_INJECT_VECTOR:
+                               if (verbose)
+                                       printf("Delivering evtchn upcall\n");
+                               evtchn_irq_expected = true;
+                               vinfo->evtchn_upcall_pending = 1;
+                               break;
+
+                       case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
+                               TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+                               if (!do_runstate_tests)
+                                       goto done;
+                               if (verbose)
+                                       printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+                               rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
+                                       TEST_RUNSTATE_runnable;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+
+                       case TEST_RUNSTATE_ADJUST:
+                               if (verbose)
+                                       printf("Testing RUNSTATE_ADJUST\n");
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = (uint64_t)-1;
+                               rst.u.runstate.time_blocked =
+                                       0x5a - rs->time[RUNSTATE_blocked];
+                               rst.u.runstate.time_offline =
+                                       0x6b6b - rs->time[RUNSTATE_offline];
+                               rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+                                       rst.u.runstate.time_offline;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+
+                       case TEST_RUNSTATE_DATA:
+                               if (verbose)
+                                       printf("Testing RUNSTATE_DATA\n");
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = RUNSTATE_running;
+                               rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+                               rst.u.runstate.time_blocked = 0x6b6b;
+                               rst.u.runstate.time_offline = 0x5a;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+
+                       case TEST_STEAL_TIME:
+                               if (verbose)
+                                       printf("Testing steal time\n");
+                               /* Yield until scheduler delay exceeds target */
+                               rundelay = get_run_delay() + MIN_STEAL_TIME;
+                               do {
+                                       sched_yield();
+                               } while (get_run_delay() < rundelay);
+                               break;
+
+                       case TEST_EVTCHN_MASKED:
+                               if (!do_eventfd_tests)
+                                       goto done;
+                               if (verbose)
+                                       printf("Testing masked event channel\n");
+                               shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
+                               eventfd_write(irq_fd[0], 1UL);
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_UNMASKED:
+                               if (verbose)
+                                       printf("Testing unmasked event channel\n");
+                               /* Unmask that, but deliver the other one */
+                               shinfo->evtchn_pending[0] = 0;
+                               shinfo->evtchn_mask[0] = 0;
+                               eventfd_write(irq_fd[1], 1UL);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_SLOWPATH:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[1] = 0;
+                               if (verbose)
+                                       printf("Testing event channel after memslot change\n");
+                               vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                                           DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+                               eventfd_write(irq_fd[0], 1UL);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_SEND_IOCTL:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               if (!do_evtchn_tests)
+                                       goto done;
+
+                               shinfo->evtchn_pending[0] = 0;
+                               if (verbose)
+                                       printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
+
+                               struct kvm_irq_routing_xen_evtchn e;
+                               e.port = EVTCHN_TEST2;
+                               e.vcpu = vcpu->id;
+                               e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+                               vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_HCALL:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[1] = 0;
+
+                               if (verbose)
+                                       printf("Testing guest EVTCHNOP_send direct to evtchn\n");
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_HCALL_SLOWPATH:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[0] = 0;
+
+                               if (verbose)
+                                       printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
+                               vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                                           DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_EVTCHN_HCALL_EVENTFD:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[0] = 0;
+
+                               if (verbose)
+                                       printf("Testing guest EVTCHNOP_send to eventfd\n");
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_TIMER_SETUP:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[1] = 0;
+
+                               if (verbose)
+                                       printf("Testing guest oneshot timer\n");
+                               break;
+
+                       case TEST_TIMER_WAIT:
+                               memset(&tmr, 0, sizeof(tmr));
+                               tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+                               TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
+                                           "Timer port not returned");
+                               TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+                                           "Timer priority not returned");
+                               TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
+                                           "Timer expiry not returned");
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_TIMER_RESTORE:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               shinfo->evtchn_pending[0] = 0;
+
+                               if (verbose)
+                                       printf("Testing restored oneshot timer\n");
+
+                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               evtchn_irq_expected = true;
+                               alarm(1);
+                               break;
+
+                       case TEST_POLL_READY:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+
+                               if (verbose)
+                                       printf("Testing SCHEDOP_poll with already pending event\n");
+                               shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
+                               alarm(1);
+                               break;
+
+                       case TEST_POLL_TIMEOUT:
+                               if (verbose)
+                                       printf("Testing SCHEDOP_poll timeout\n");
+                               shinfo->evtchn_pending[0] = 0;
+                               alarm(1);
+                               break;
+
+                       case TEST_POLL_MASKED:
+                               if (verbose)
+                                       printf("Testing SCHEDOP_poll wake on masked event\n");
+
+                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               alarm(1);
+                               break;
+
+                       case TEST_POLL_WAKE:
+                               shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
+                               if (verbose)
+                                       printf("Testing SCHEDOP_poll wake on unmasked event\n");
+
+                               evtchn_irq_expected = true;
+                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+
+                               /* Read it back and check the pending time is reported correctly */
+                               tmr.u.timer.expires_ns = 0;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+                               TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
+                                           "Timer not reported pending");
+                               alarm(1);
+                               break;
+
+                       case SET_VCPU_INFO:
+                               if (has_shinfo_hva) {
+                                       struct kvm_xen_vcpu_attr vih = {
+                                               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
+                                               .u.hva = (unsigned long)vinfo
+                                       };
+                                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
+                               }
+                               break;
+
+                       case TEST_TIMER_PAST:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               /* Read timer and check it is no longer pending */
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+                               TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
+
+                               shinfo->evtchn_pending[0] = 0;
+                               if (verbose)
+                                       printf("Testing timer in the past\n");
+
+                               evtchn_irq_expected = true;
+                               tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               alarm(1);
+                               break;
+
+                       case TEST_LOCKING_SEND_RACE:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+                               alarm(0);
+
+                               if (verbose)
+                                       printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
+
+                               ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
+                               TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
+
+                               struct kvm_irq_routing_xen_evtchn uxe = {
+                                       .port = 1,
+                                       .vcpu = vcpu->id,
+                                       .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
+                               };
+
+                               evtchn_irq_expected = true;
+                               for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
+                                       __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
+                               break;
+
+                       case TEST_LOCKING_POLL_RACE:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+
+                               if (verbose)
+                                       printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
+
+                               shinfo->evtchn_pending[0] = 1;
+
+                               evtchn_irq_expected = true;
+                               tmr.u.timer.expires_ns = rs->state_entry_time +
+                                                        SHINFO_RACE_TIMEOUT * 1000000000ULL;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               break;
+
+                       case TEST_LOCKING_POLL_TIMEOUT:
+                               /*
+                                * Optional and possibly repeated sync point.
+                                * Injecting the timer IRQ may fail if the
+                                * shinfo is invalid when the timer expires.
+                                * If the timer has expired but the IRQ hasn't
+                                * been delivered, rearm the timer and retry.
+                                */
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+
+                               /* Resume the guest if the timer is still pending. */
+                               if (tmr.u.timer.expires_ns)
+                                       break;
+
+                               /* All done if the IRQ was delivered. */
+                               if (!evtchn_irq_expected)
+                                       break;
+
+                               tmr.u.timer.expires_ns = rs->state_entry_time +
+                                                        SHINFO_RACE_TIMEOUT * 1000000000ULL;
+                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+                               break;
+                       case TEST_DONE:
+                               TEST_ASSERT(!evtchn_irq_expected,
+                                           "Expected event channel IRQ but it didn't happen");
+
+                               ret = pthread_cancel(thread);
+                               TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
+
+                               ret = pthread_join(thread, 0);
+                               TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
+                               goto done;
+
+                       case TEST_GUEST_SAW_IRQ:
+                               TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+                               evtchn_irq_expected = false;
+                               break;
+                       }
+                       break;
+               }
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+               }
+       }
+
+ done:
+       evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
+       evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
+       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
+
+       alarm(0);
+
+       /*
+        * Just a *really* basic check that things are being put in the
+        * right place. The actual calculations are much the same for
+        * Xen as they are for the KVM variants, so no need to check.
+        */
+       struct pvclock_wall_clock *wc;
+       struct pvclock_vcpu_time_info *ti, *ti2;
+       struct kvm_clock_data kcdata;
+       long long delta;
+
+       wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
+       ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
+       ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+
+       if (verbose) {
+               printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+               printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+                      ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+                      ti->tsc_shift, ti->flags);
+               printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+                      ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+                      ti2->tsc_shift, ti2->flags);
+       }
+
+       TEST_ASSERT(wc->version && !(wc->version & 1),
+                   "Bad wallclock version %x", wc->version);
+
+       vm_ioctl(vm, KVM_GET_CLOCK, &kcdata);
+
+       if (kcdata.flags & KVM_CLOCK_REALTIME) {
+               if (verbose) {
+                       printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
+                              kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC);
+                       printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
+                              kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC);
+               }
+
+               delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock);
+
+               /*
+                * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
+                * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
+                * delta as testing clock accuracy is not the goal here. The test just needs to
+                * check that the value in shinfo is somewhat sane.
+                */
+               TEST_ASSERT(llabs(delta) < NSEC_PER_SEC,
+                           "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
+                           wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC,
+                           (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC);
+       } else {
+               pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
+       }
+
+       TEST_ASSERT(ti->version && !(ti->version & 1),
+                   "Bad time_info version %x", ti->version);
+       TEST_ASSERT(ti2->version && !(ti2->version & 1),
+                   "Bad time_info version %x", ti->version);
+
+       if (do_runstate_tests) {
+               /*
+                * Fetch runstate and check sanity. Strictly speaking in the
+                * general case we might not expect the numbers to be identical
+                * but in this case we know we aren't running the vCPU any more.
+                */
+               struct kvm_xen_vcpu_attr rst = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+               };
+               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+               if (verbose) {
+                       printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+                              rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+                              rs->state, rs->state_entry_time);
+                       for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+                               printf("State %s: %" PRIu64 " ns\n",
+                                      runstate_names[i], rs->time[i]);
+                       }
+               }
+
+               /*
+                * Exercise runstate info at all points across the page boundary, in
+                * 32-bit and 64-bit mode. In particular, test the case where it is
+                * configured in 32-bit mode and then switched to 64-bit mode while
+                * active, which takes it onto the second page.
+                */
+               unsigned long runstate_addr;
+               struct compat_vcpu_runstate_info *crs;
+               for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
+                    runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
+
+                       rs = addr_gpa2hva(vm, runstate_addr);
+                       crs = (void *)rs;
+
+                       memset(rs, 0xa5, sizeof(*rs));
+
+                       /* Set to compatibility mode */
+                       lm.u.long_mode = 0;
+                       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+                       /* Set runstate to new address (kernel will write it) */
+                       struct kvm_xen_vcpu_attr st = {
+                               .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+                               .u.gpa = runstate_addr,
+                       };
+                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+
+                       if (verbose)
+                               printf("Compatibility runstate at %08lx\n", runstate_addr);
+
+                       TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
+                       TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
+                                   "State entry time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+                                   "Running time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+                                   "Runnable time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+                                   "Blocked time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+                                   "Offline time mismatch");
+                       TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+                                   "Structure overrun");
+                       TEST_ASSERT(crs->state_entry_time == crs->time[0] +
+                                   crs->time[1] + crs->time[2] + crs->time[3],
+                                   "runstate times don't add up");
+
+
+                       /* Now switch to 64-bit mode */
+                       lm.u.long_mode = 1;
+                       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+                       memset(rs, 0xa5, sizeof(*rs));
+
+                       /* Don't change the address, just trigger a write */
+                       struct kvm_xen_vcpu_attr adj = {
+                               .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
+                               .u.runstate.state = (uint64_t)-1
+                       };
+                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
+
+                       if (verbose)
+                               printf("64-bit runstate at %08lx\n", runstate_addr);
+
+                       TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+                       TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+                                   "State entry time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+                                   "Running time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+                                   "Runnable time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+                                   "Blocked time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+                                   "Offline time mismatch");
+                       TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+                                   "Structure overrun");
+
+                       TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                                   rs->time[1] + rs->time[2] + rs->time[3],
+                                   "runstate times don't add up");
+               }
+       }
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
new file mode 100644 (file)
index 0000000..2585087
--- /dev/null
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xen_vmcall_test
+ *
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Userspace hypercall testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+#define HCALL_REGION_GPA       0xc0000000ULL
+#define HCALL_REGION_SLOT      10
+
+#define INPUTVALUE 17
+#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
+#define RETVALUE 0xcafef00dfbfbffffUL
+
+#define XEN_HYPERCALL_MSR      0x40000200
+#define HV_GUEST_OS_ID_MSR     0x40000000
+#define HV_HYPERCALL_MSR       0x40000001
+
+#define HVCALL_SIGNAL_EVENT            0x005d
+#define HV_STATUS_INVALID_ALIGNMENT    4
+
+static void guest_code(void)
+{
+       unsigned long rax = INPUTVALUE;
+       unsigned long rdi = ARGVALUE(1);
+       unsigned long rsi = ARGVALUE(2);
+       unsigned long rdx = ARGVALUE(3);
+       unsigned long rcx;
+       register unsigned long r10 __asm__("r10") = ARGVALUE(4);
+       register unsigned long r8 __asm__("r8") = ARGVALUE(5);
+       register unsigned long r9 __asm__("r9") = ARGVALUE(6);
+
+       /* First a direct invocation of 'vmcall' */
+       __asm__ __volatile__("vmcall" :
+                            "=a"(rax) :
+                            "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+                            "r"(r10), "r"(r8), "r"(r9));
+       GUEST_ASSERT(rax == RETVALUE);
+
+       /* Fill in the Xen hypercall page */
+       __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+                            "a" (HCALL_REGION_GPA & 0xffffffff),
+                            "d" (HCALL_REGION_GPA >> 32));
+
+       /* Set Hyper-V Guest OS ID */
+       __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
+                            "a" (0x5a), "d" (0));
+
+       /* Hyper-V hypercall page */
+       u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
+       __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
+                            "a" (msrval & 0xffffffff),
+                            "d" (msrval >> 32));
+
+       /* Invoke a Xen hypercall */
+       __asm__ __volatile__("call *%1" : "=a"(rax) :
+                            "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+                            "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+                            "r"(r10), "r"(r8), "r"(r9));
+       GUEST_ASSERT(rax == RETVALUE);
+
+       /* Invoke a Hyper-V hypercall */
+       rax = 0;
+       rcx = HVCALL_SIGNAL_EVENT;      /* code */
+       rdx = 0x5a5a5a5a;               /* ingpa (badly aligned) */
+       __asm__ __volatile__("call *%1" : "=a"(rax) :
+                            "r"(HCALL_REGION_GPA + PAGE_SIZE),
+                            "a"(rax), "c"(rcx), "d"(rdx),
+                            "r"(r8));
+       GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int xen_caps;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+       TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       vcpu_set_hv_cpuid(vcpu);
+
+       struct kvm_xen_hvm_config hvmc = {
+               .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+               .msr = XEN_HYPERCALL_MSR,
+       };
+       vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+       /* Map a region for the hypercall pages */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
+       virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
+
+       for (;;) {
+               volatile struct kvm_run *run = vcpu->run;
+               struct ucall uc;
+
+               vcpu_run(vcpu);
+
+               if (run->exit_reason == KVM_EXIT_XEN) {
+                       TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+                       run->xen.u.hcall.result = RETVALUE;
+                       continue;
+               }
+
+               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       goto done;
+               default:
+                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+               }
+       }
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c
new file mode 100644 (file)
index 0000000..f331a4e
--- /dev/null
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, Google LLC.
+ *
+ * Tests for the IA32_XSS MSR.
+ */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MSR_BITS      64
+
+int main(int argc, char *argv[])
+{
+       bool xss_in_msr_list;
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       uint64_t xss_val;
+       int i, r;
+
+       /* Create VM */
+       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
+
+       xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
+       TEST_ASSERT(xss_val == 0,
+                   "MSR_IA32_XSS should be initialized to zero");
+
+       vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
+
+       /*
+        * At present, KVM only supports a guest IA32_XSS value of 0. Verify
+        * that trying to set the guest IA32_XSS to an unsupported value fails.
+        * Also, in the future when a non-zero value succeeds check that
+        * IA32_XSS is in the list of MSRs to save/restore.
+        */
+       xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
+       for (i = 0; i < MSR_BITS; ++i) {
+               r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
+
+               /*
+                * Setting a list of MSRs returns the entry that "faulted", or
+                * the last entry +1 if all MSRs were successfully written.
+                */
+               TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+               TEST_ASSERT(r != 1 || xss_in_msr_list,
+                           "IA32_XSS was able to be set, but was not in save/restore list");
+       }
+
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
deleted file mode 100644 (file)
index f4ce5a1..0000000
+++ /dev/null
@@ -1,315 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * amx tests
- *
- * Copyright (C) 2021, Intel, Inc.
- *
- * Tests for amx #NM exception and save/restore.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/syscall.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#ifndef __x86_64__
-# error This test is 64-bit only
-#endif
-
-#define NUM_TILES                      8
-#define TILE_SIZE                      1024
-#define XSAVE_SIZE                     ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
-
-/* Tile configuration associated: */
-#define PALETTE_TABLE_INDEX            1
-#define MAX_TILES                      16
-#define RESERVED_BYTES                 14
-
-#define XSAVE_HDR_OFFSET               512
-
-struct tile_config {
-       u8  palette_id;
-       u8  start_row;
-       u8  reserved[RESERVED_BYTES];
-       u16 colsb[MAX_TILES];
-       u8  rows[MAX_TILES];
-};
-
-struct tile_data {
-       u8 data[NUM_TILES * TILE_SIZE];
-};
-
-struct xtile_info {
-       u16 bytes_per_tile;
-       u16 bytes_per_row;
-       u16 max_names;
-       u16 max_rows;
-       u32 xsave_offset;
-       u32 xsave_size;
-};
-
-static struct xtile_info xtile;
-
-static inline void __ldtilecfg(void *cfg)
-{
-       asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
-                    : : "a"(cfg));
-}
-
-static inline void __tileloadd(void *tile)
-{
-       asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
-                    : : "a"(tile), "d"(0));
-}
-
-static inline void __tilerelease(void)
-{
-       asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
-}
-
-static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
-{
-       uint32_t rfbm_lo = rfbm;
-       uint32_t rfbm_hi = rfbm >> 32;
-
-       asm volatile("xsavec (%%rdi)"
-                    : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
-                    : "memory");
-}
-
-static void check_xtile_info(void)
-{
-       GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
-
-       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
-       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
-
-       xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
-       GUEST_ASSERT(xtile.xsave_offset == 2816);
-       xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
-       GUEST_ASSERT(xtile.xsave_size == 8192);
-       GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
-
-       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
-       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
-                    PALETTE_TABLE_INDEX);
-
-       GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
-       xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
-       GUEST_ASSERT(xtile.max_names == 8);
-       xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
-       GUEST_ASSERT(xtile.bytes_per_tile == 1024);
-       xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
-       GUEST_ASSERT(xtile.bytes_per_row == 64);
-       xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
-       GUEST_ASSERT(xtile.max_rows == 16);
-}
-
-static void set_tilecfg(struct tile_config *cfg)
-{
-       int i;
-
-       /* Only palette id 1 */
-       cfg->palette_id = 1;
-       for (i = 0; i < xtile.max_names; i++) {
-               cfg->colsb[i] = xtile.bytes_per_row;
-               cfg->rows[i] = xtile.max_rows;
-       }
-}
-
-static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
-                                                   struct tile_data *tiledata,
-                                                   struct xstate *xstate)
-{
-       GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
-                    this_cpu_has(X86_FEATURE_OSXSAVE));
-       check_xtile_info();
-       GUEST_SYNC(1);
-
-       /* xfd=0, enable amx */
-       wrmsr(MSR_IA32_XFD, 0);
-       GUEST_SYNC(2);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
-       set_tilecfg(amx_cfg);
-       __ldtilecfg(amx_cfg);
-       GUEST_SYNC(3);
-       /* Check save/restore when trap to userspace */
-       __tileloadd(tiledata);
-       GUEST_SYNC(4);
-       __tilerelease();
-       GUEST_SYNC(5);
-       /*
-        * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
-        * the xcomp_bv.
-        */
-       xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
-       __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
-       GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
-       GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
-
-       /* xfd=0x40000, disable amx tiledata */
-       wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
-
-       /*
-        * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
-        * remains the same even when amx tiledata is disabled by IA32_XFD.
-        */
-       xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
-       __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
-       GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
-       GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
-
-       GUEST_SYNC(6);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
-       set_tilecfg(amx_cfg);
-       __ldtilecfg(amx_cfg);
-       /* Trigger #NM exception */
-       __tileloadd(tiledata);
-       GUEST_SYNC(10);
-
-       GUEST_DONE();
-}
-
-void guest_nm_handler(struct ex_regs *regs)
-{
-       /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
-       GUEST_SYNC(7);
-       GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
-       GUEST_SYNC(8);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
-       GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
-       /* Clear xfd_err */
-       wrmsr(MSR_IA32_XFD_ERR, 0);
-       /* xfd=0, enable amx */
-       wrmsr(MSR_IA32_XFD, 0);
-       GUEST_SYNC(9);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_regs regs1, regs2;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_x86_state *state;
-       int xsave_restore_size;
-       vm_vaddr_t amx_cfg, tiledata, xstate;
-       struct ucall uc;
-       u32 amx_offset;
-       int ret;
-
-       /*
-        * Note, all off-by-default features must be enabled before anything
-        * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
-        */
-       vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
-                   "KVM should enumerate max XSAVE size when XSAVE is supported");
-       xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
-
-       vcpu_regs_get(vcpu, &regs1);
-
-       /* Register #NM handler */
-       vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
-
-       /* amx cfg for guest_code */
-       amx_cfg = vm_vaddr_alloc_page(vm);
-       memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
-
-       /* amx tiledata for guest_code */
-       tiledata = vm_vaddr_alloc_pages(vm, 2);
-       memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
-
-       /* XSAVE state for guest_code */
-       xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
-       memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
-       vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
-
-       for (;;) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       switch (uc.args[1]) {
-                       case 1:
-                       case 2:
-                       case 3:
-                       case 5:
-                       case 6:
-                       case 7:
-                       case 8:
-                               fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
-                               break;
-                       case 4:
-                       case 10:
-                               fprintf(stderr,
-                               "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
-
-                               /* Compacted mode, get amx offset by xsave area
-                                * size subtract 8K amx size.
-                                */
-                               amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
-                               state = vcpu_save_state(vcpu);
-                               void *amx_start = (void *)state->xsave + amx_offset;
-                               void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
-                               /* Only check TMM0 register, 1 tile */
-                               ret = memcmp(amx_start, tiles_data, TILE_SIZE);
-                               TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
-                               kvm_x86_state_cleanup(state);
-                               break;
-                       case 9:
-                               fprintf(stderr,
-                               "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
-                               break;
-                       }
-                       break;
-               case UCALL_DONE:
-                       fprintf(stderr, "UCALL_DONE\n");
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               state = vcpu_save_state(vcpu);
-               memset(&regs1, 0, sizeof(regs1));
-               vcpu_regs_get(vcpu, &regs1);
-
-               kvm_vm_release(vm);
-
-               /* Restore state in a new VM.  */
-               vcpu = vm_recreate_with_one_vcpu(vm);
-               vcpu_load_state(vcpu, state);
-               kvm_x86_state_cleanup(state);
-
-               memset(&regs2, 0, sizeof(regs2));
-               vcpu_regs_get(vcpu, &regs2);
-               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-                           (ulong) regs2.rdi, (ulong) regs2.rsi);
-       }
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c
deleted file mode 100644 (file)
index f8916bb..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2024 Intel Corporation
- *
- * Verify KVM correctly emulates the APIC bus frequency when the VMM configures
- * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS.  Start the APIC timer by
- * programming TMICT (timer initial count) to the largest value possible (so
- * that the timer will not expire during the test).  Then, after an arbitrary
- * amount of time has elapsed, verify TMCCT (timer current count) is within 1%
- * of the expected value based on the time elapsed, the APIC bus frequency, and
- * the programmed TDCR (timer divide configuration register).
- */
-
-#include "apic.h"
-#include "test_util.h"
-
-/*
- * Possible TDCR values with matching divide count. Used to modify APIC
- * timer frequency.
- */
-static const struct {
-       const uint32_t tdcr;
-       const uint32_t divide_count;
-} tdcrs[] = {
-       {0x0, 2},
-       {0x1, 4},
-       {0x2, 8},
-       {0x3, 16},
-       {0x8, 32},
-       {0x9, 64},
-       {0xa, 128},
-       {0xb, 1},
-};
-
-static bool is_x2apic;
-
-static void apic_enable(void)
-{
-       if (is_x2apic)
-               x2apic_enable();
-       else
-               xapic_enable();
-}
-
-static uint32_t apic_read_reg(unsigned int reg)
-{
-       return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
-}
-
-static void apic_write_reg(unsigned int reg, uint32_t val)
-{
-       if (is_x2apic)
-               x2apic_write_reg(reg, val);
-       else
-               xapic_write_reg(reg, val);
-}
-
-static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
-{
-       uint64_t tsc_hz = guest_tsc_khz * 1000;
-       const uint32_t tmict = ~0u;
-       uint64_t tsc0, tsc1, freq;
-       uint32_t tmcct;
-       int i;
-
-       apic_enable();
-
-       /*
-        * Setup one-shot timer.  The vector does not matter because the
-        * interrupt should not fire.
-        */
-       apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED);
-
-       for (i = 0; i < ARRAY_SIZE(tdcrs); i++) {
-               apic_write_reg(APIC_TDCR, tdcrs[i].tdcr);
-               apic_write_reg(APIC_TMICT, tmict);
-
-               tsc0 = rdtsc();
-               udelay(delay_ms * 1000);
-               tmcct = apic_read_reg(APIC_TMCCT);
-               tsc1 = rdtsc();
-
-               /*
-                * Stop the timer _after_ reading the current, final count, as
-                * writing the initial counter also modifies the current count.
-                */
-               apic_write_reg(APIC_TMICT, 0);
-
-               freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0);
-               /* Check if measured frequency is within 5% of configured frequency. */
-               __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100,
-                              "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu",
-                              freq, apic_hz * 95 / 100, apic_hz * 105 / 100,
-                              apic_hz, tdcrs[i].divide_count, tsc_hz);
-       }
-
-       GUEST_DONE();
-}
-
-static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
-{
-       bool done = false;
-       struct ucall uc;
-
-       while (!done) {
-               vcpu_run(vcpu);
-
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_DONE:
-                       done = true;
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-                       break;
-               }
-       }
-}
-
-static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
-                                   bool x2apic)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       int ret;
-
-       is_x2apic = x2apic;
-
-       vm = vm_create(1);
-
-       sync_global_to_guest(vm, is_x2apic);
-
-       vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
-                     NSEC_PER_SEC / apic_hz);
-
-       vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
-       vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
-
-       ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
-                             NSEC_PER_SEC / apic_hz);
-       TEST_ASSERT(ret < 0 && errno == EINVAL,
-                   "Setting of APIC bus frequency after vCPU is created should fail.");
-
-       if (!is_x2apic)
-               virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
-       test_apic_bus_clock(vcpu);
-       kvm_vm_free(vm);
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name);
-       puts("");
-       printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n");
-       printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n");
-       puts("");
-}
-
-int main(int argc, char *argv[])
-{
-       /*
-        * Arbitrarilty default to 25MHz for the APIC bus frequency, which is
-        * different enough from the default 1GHz to be interesting.
-        */
-       uint64_t apic_hz = 25 * 1000 * 1000;
-       uint64_t delay_ms = 100;
-       int opt;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
-
-       while ((opt = getopt(argc, argv, "d:f:h")) != -1) {
-               switch (opt) {
-               case 'f':
-                       apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000;
-                       break;
-               case 'd':
-                       delay_ms = atoi_positive("Delay in milliseconds", optarg);
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       exit(KSFT_SKIP);
-               }
-       }
-
-       run_apic_bus_clock_test(apic_hz, delay_ms, false);
-       run_apic_bus_clock_test(apic_hz, delay_ms, true);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
deleted file mode 100644 (file)
index 7b3fda6..0000000
+++ /dev/null
@@ -1,225 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat Inc.
- *
- * Generic tests for KVM CPUID set/get ioctls
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-struct cpuid_mask {
-       union {
-               struct {
-                       u32 eax;
-                       u32 ebx;
-                       u32 ecx;
-                       u32 edx;
-               };
-               u32 regs[4];
-       };
-};
-
-static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
-{
-       int i;
-       u32 eax, ebx, ecx, edx;
-
-       for (i = 0; i < guest_cpuid->nent; i++) {
-               __cpuid(guest_cpuid->entries[i].function,
-                       guest_cpuid->entries[i].index,
-                       &eax, &ebx, &ecx, &edx);
-
-               GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
-               GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
-               GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
-               GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
-       }
-
-}
-
-static void guest_main(struct kvm_cpuid2 *guest_cpuid)
-{
-       GUEST_SYNC(1);
-
-       test_guest_cpuids(guest_cpuid);
-
-       GUEST_SYNC(2);
-
-       GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
-
-       GUEST_DONE();
-}
-
-static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry)
-{
-       struct cpuid_mask mask;
-
-       memset(&mask, 0xff, sizeof(mask));
-
-       switch (entry->function) {
-       case 0x1:
-               mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit);
-               break;
-       case 0x7:
-               mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit);
-               break;
-       case 0xd:
-               /*
-                * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current
-                * XCR0 and IA32_XSS MSR values.
-                */
-               if (entry->index < 2)
-                       mask.ebx = 0;
-               break;
-       }
-       return mask;
-}
-
-static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
-                          const struct kvm_cpuid2 *cpuid2)
-{
-       const struct kvm_cpuid_entry2 *e1, *e2;
-       int i;
-
-       TEST_ASSERT(cpuid1->nent == cpuid2->nent,
-                   "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
-
-       for (i = 0; i < cpuid1->nent; i++) {
-               struct cpuid_mask mask;
-
-               e1 = &cpuid1->entries[i];
-               e2 = &cpuid2->entries[i];
-
-               TEST_ASSERT(e1->function == e2->function &&
-                           e1->index == e2->index && e1->flags == e2->flags,
-                           "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
-                           i, e1->function, e1->index, e1->flags,
-                           e2->function, e2->index, e2->flags);
-
-               /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */
-               mask = get_const_cpuid_mask(e1);
-
-               TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) &&
-                           (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) &&
-                           (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) &&
-                           (e1->edx & mask.edx) == (e2->edx & mask.edx),
-                           "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
-                           e1->function, e1->index,
-                           e1->eax & mask.eax, e1->ebx & mask.ebx,
-                           e1->ecx & mask.ecx, e1->edx & mask.edx,
-                           e2->eax & mask.eax, e2->ebx & mask.ebx,
-                           e2->ecx & mask.ecx, e2->edx & mask.edx);
-       }
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage + 1,
-                           "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage + 1, (ulong)uc.args[1]);
-               return;
-       case UCALL_DONE:
-               return;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_ASSERT(false, "Unexpected exit: %s",
-                           exit_reason_str(vcpu->run->exit_reason));
-       }
-}
-
-struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
-{
-       int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
-       vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
-       struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
-
-       memcpy(guest_cpuids, cpuid, size);
-
-       *p_gva = gva;
-       return guest_cpuids;
-}
-
-static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid_entry2 *ent;
-       int rc;
-       u32 eax, ebx, x;
-
-       /* Setting unmodified CPUID is allowed */
-       rc = __vcpu_set_cpuid(vcpu);
-       TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
-
-       /* Changing CPU features is forbidden */
-       ent = vcpu_get_cpuid_entry(vcpu, 0x7);
-       ebx = ent->ebx;
-       ent->ebx--;
-       rc = __vcpu_set_cpuid(vcpu);
-       TEST_ASSERT(rc, "Changing CPU features should fail");
-       ent->ebx = ebx;
-
-       /* Changing MAXPHYADDR is forbidden */
-       ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
-       eax = ent->eax;
-       x = eax & 0xff;
-       ent->eax = (eax & ~0xffu) | (x - 1);
-       rc = __vcpu_set_cpuid(vcpu);
-       TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
-       ent->eax = eax;
-}
-
-static void test_get_cpuid2(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
-       int i, r;
-
-       vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
-       TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
-                   "KVM didn't update nent on success, wanted %u, got %u",
-                   vcpu->cpuid->nent, cpuid->nent);
-
-       for (i = 0; i < vcpu->cpuid->nent; i++) {
-               cpuid->nent = i;
-               r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
-               TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
-               TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
-       }
-       free(cpuid);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t cpuid_gva;
-       struct kvm_vm *vm;
-       int stage;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
-
-       vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
-
-       vcpu_args_set(vcpu, 1, cpuid_gva);
-
-       for (stage = 0; stage < 3; stage++)
-               run_vcpu(vcpu, stage);
-
-       set_cpuid_after_run(vcpu);
-
-       test_get_cpuid2(vcpu);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
deleted file mode 100644 (file)
index 28cc664..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CR4 and CPUID sync test
- *
- * Copyright 2018, Red Hat, Inc. and/or its affiliates.
- *
- * Author:
- *   Wei Huang <wei@redhat.com>
- */
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define MAGIC_HYPERCALL_PORT   0x80
-
-static void guest_code(void)
-{
-       u32 regs[4] = {
-               [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function,
-               [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index,
-       };
-
-       /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */
-       GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE);
-
-       /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
-       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
-
-       /*
-        * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output,
-        * and then restore CR4.  Do this all in  assembly to ensure no AVX
-        * instructions are executed while OSXSAVE=0.
-        */
-       asm volatile (
-               "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t"
-               "cpuid\n\t"
-               "mov %%rdi, %%cr4\n\t"
-               : "+a" (regs[KVM_CPUID_EAX]),
-                 "=b" (regs[KVM_CPUID_EBX]),
-                 "+c" (regs[KVM_CPUID_ECX]),
-                 "=d" (regs[KVM_CPUID_EDX])
-               : "D" (get_cr4())
-       );
-
-       /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */
-       GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit)));
-
-       /* Verify restoring CR4 also restored OSXSAVE in CPUID. */
-       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_sregs sregs;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       while (1) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT &&
-                   vcpu->run->io.direction == KVM_EXIT_IO_OUT) {
-                       /* emulate hypervisor clearing CR4.OSXSAVE */
-                       vcpu_sregs_get(vcpu, &sregs);
-                       sregs.cr4 &= ~X86_CR4_OSXSAVE;
-                       vcpu_sregs_set(vcpu, &sregs);
-                       continue;
-               }
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
deleted file mode 100644 (file)
index 2d814c1..0000000
+++ /dev/null
@@ -1,217 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM guest debug register tests
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <string.h>
-#include "kvm_util.h"
-#include "processor.h"
-#include "apic.h"
-
-#define DR6_BD         (1 << 13)
-#define DR7_GD         (1 << 13)
-
-#define IRQ_VECTOR 0xAA
-
-/* For testing data access debug BP */
-uint32_t guest_value;
-
-extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
-
-static void guest_code(void)
-{
-       /* Create a pending interrupt on current vCPU */
-       x2apic_enable();
-       x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
-                        APIC_DM_FIXED | IRQ_VECTOR);
-
-       /*
-        * Software BP tests.
-        *
-        * NOTE: sw_bp need to be before the cmd here, because int3 is an
-        * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we
-        * capture it using the vcpu exception bitmap).
-        */
-       asm volatile("sw_bp: int3");
-
-       /* Hardware instruction BP test */
-       asm volatile("hw_bp: nop");
-
-       /* Hardware data BP test */
-       asm volatile("mov $1234,%%rax;\n\t"
-                    "mov %%rax,%0;\n\t write_data:"
-                    : "=m" (guest_value) : : "rax");
-
-       /*
-        * Single step test, covers 2 basic instructions and 2 emulated
-        *
-        * Enable interrupts during the single stepping to see that pending
-        * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ.
-        *
-        * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler
-        * exits to userspace due to single-step being enabled.
-        */
-       asm volatile("ss_start: "
-                    "sti\n\t"
-                    "xor %%eax,%%eax\n\t"
-                    "cpuid\n\t"
-                    "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t"
-                    "wrmsr\n\t"
-                    "cli\n\t"
-                    : : : "eax", "ebx", "ecx", "edx");
-
-       /* DR6.BD test */
-       asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
-       GUEST_DONE();
-}
-
-#define  CAST_TO_RIP(v)  ((unsigned long long)&(v))
-
-static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
-{
-       struct kvm_regs regs;
-
-       vcpu_regs_get(vcpu, &regs);
-       regs.rip += insn_len;
-       vcpu_regs_set(vcpu, &regs);
-}
-
-int main(void)
-{
-       struct kvm_guest_debug debug;
-       unsigned long long target_dr6, target_rip;
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       uint64_t cmd;
-       int i;
-       /* Instruction lengths starting at ss_start */
-       int ss_size[6] = {
-               1,              /* sti*/
-               2,              /* xor */
-               2,              /* cpuid */
-               5,              /* mov */
-               2,              /* rdmsr */
-               1,              /* cli */
-       };
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-
-       /* Test software BPs - int3 */
-       memset(&debug, 0, sizeof(debug));
-       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
-       vcpu_guest_debug_set(vcpu, &debug);
-       vcpu_run(vcpu);
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                   run->debug.arch.exception == BP_VECTOR &&
-                   run->debug.arch.pc == CAST_TO_RIP(sw_bp),
-                   "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
-                   run->exit_reason, run->debug.arch.exception,
-                   run->debug.arch.pc, CAST_TO_RIP(sw_bp));
-       vcpu_skip_insn(vcpu, 1);
-
-       /* Test instruction HW BP over DR[0-3] */
-       for (i = 0; i < 4; i++) {
-               memset(&debug, 0, sizeof(debug));
-               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
-               debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
-               debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
-               vcpu_guest_debug_set(vcpu, &debug);
-               vcpu_run(vcpu);
-               target_dr6 = 0xffff0ff0 | (1UL << i);
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                           run->debug.arch.exception == DB_VECTOR &&
-                           run->debug.arch.pc == CAST_TO_RIP(hw_bp) &&
-                           run->debug.arch.dr6 == target_dr6,
-                           "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
-                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
-                           i, run->exit_reason, run->debug.arch.exception,
-                           run->debug.arch.pc, CAST_TO_RIP(hw_bp),
-                           run->debug.arch.dr6, target_dr6);
-       }
-       /* Skip "nop" */
-       vcpu_skip_insn(vcpu, 1);
-
-       /* Test data access HW BP over DR[0-3] */
-       for (i = 0; i < 4; i++) {
-               memset(&debug, 0, sizeof(debug));
-               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
-               debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
-               debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
-                   (0x000d0000UL << (4*i));
-               vcpu_guest_debug_set(vcpu, &debug);
-               vcpu_run(vcpu);
-               target_dr6 = 0xffff0ff0 | (1UL << i);
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                           run->debug.arch.exception == DB_VECTOR &&
-                           run->debug.arch.pc == CAST_TO_RIP(write_data) &&
-                           run->debug.arch.dr6 == target_dr6,
-                           "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
-                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
-                           i, run->exit_reason, run->debug.arch.exception,
-                           run->debug.arch.pc, CAST_TO_RIP(write_data),
-                           run->debug.arch.dr6, target_dr6);
-               /* Rollback the 4-bytes "mov" */
-               vcpu_skip_insn(vcpu, -7);
-       }
-       /* Skip the 4-bytes "mov" */
-       vcpu_skip_insn(vcpu, 7);
-
-       /* Test single step */
-       target_rip = CAST_TO_RIP(ss_start);
-       target_dr6 = 0xffff4ff0ULL;
-       for (i = 0; i < ARRAY_SIZE(ss_size); i++) {
-               target_rip += ss_size[i];
-               memset(&debug, 0, sizeof(debug));
-               debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
-                               KVM_GUESTDBG_BLOCKIRQ;
-               debug.arch.debugreg[7] = 0x00000400;
-               vcpu_guest_debug_set(vcpu, &debug);
-               vcpu_run(vcpu);
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                           run->debug.arch.exception == DB_VECTOR &&
-                           run->debug.arch.pc == target_rip &&
-                           run->debug.arch.dr6 == target_dr6,
-                           "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
-                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
-                           i, run->exit_reason, run->debug.arch.exception,
-                           run->debug.arch.pc, target_rip, run->debug.arch.dr6,
-                           target_dr6);
-       }
-
-       /* Finally test global disable */
-       memset(&debug, 0, sizeof(debug));
-       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
-       debug.arch.debugreg[7] = 0x400 | DR7_GD;
-       vcpu_guest_debug_set(vcpu, &debug);
-       vcpu_run(vcpu);
-       target_dr6 = 0xffff0ff0 | DR6_BD;
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
-                   run->debug.arch.exception == DB_VECTOR &&
-                   run->debug.arch.pc == CAST_TO_RIP(bd_start) &&
-                   run->debug.arch.dr6 == target_dr6,
-                           "DR7.GD: exit %d exception %d rip 0x%llx "
-                           "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
-                           run->exit_reason, run->debug.arch.exception,
-                           run->debug.arch.pc, target_rip, run->debug.arch.dr6,
-                           target_dr6);
-
-       /* Disable all debug controls, run to the end */
-       memset(&debug, 0, sizeof(debug));
-       vcpu_guest_debug_set(vcpu, &debug);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       cmd = get_ucall(vcpu, &uc);
-       TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
deleted file mode 100644 (file)
index 2929c06..0000000
+++ /dev/null
@@ -1,263 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty logging page splitting test
- *
- * Based on dirty_log_perf.c
- *
- * Copyright (C) 2018, Red Hat, Inc.
- * Copyright (C) 2023, Google, Inc.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <linux/bitmap.h>
-
-#include "kvm_util.h"
-#include "test_util.h"
-#include "memstress.h"
-#include "guest_modes.h"
-#include "ucall_common.h"
-
-#define VCPUS          2
-#define SLOTS          2
-#define ITERATIONS     2
-
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
-
-static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
-
-static u64 dirty_log_manual_caps;
-static bool host_quit;
-static int iteration;
-static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
-
-struct kvm_page_stats {
-       uint64_t pages_4k;
-       uint64_t pages_2m;
-       uint64_t pages_1g;
-       uint64_t hugepages;
-};
-
-static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
-{
-       stats->pages_4k = vm_get_stat(vm, "pages_4k");
-       stats->pages_2m = vm_get_stat(vm, "pages_2m");
-       stats->pages_1g = vm_get_stat(vm, "pages_1g");
-       stats->hugepages = stats->pages_2m + stats->pages_1g;
-
-       pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
-                stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
-                stats->hugepages);
-}
-
-static void run_vcpu_iteration(struct kvm_vm *vm)
-{
-       int i;
-
-       iteration++;
-       for (i = 0; i < VCPUS; i++) {
-               while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
-                      iteration)
-                       ;
-       }
-}
-
-static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
-{
-       struct kvm_vcpu *vcpu = vcpu_args->vcpu;
-       int vcpu_idx = vcpu_args->vcpu_idx;
-
-       while (!READ_ONCE(host_quit)) {
-               int current_iteration = READ_ONCE(iteration);
-
-               vcpu_run(vcpu);
-
-               TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
-
-               vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
-
-               /* Wait for the start of the next iteration to be signaled. */
-               while (current_iteration == READ_ONCE(iteration) &&
-                      READ_ONCE(iteration) >= 0 &&
-                      !READ_ONCE(host_quit))
-                       ;
-       }
-}
-
-static void run_test(enum vm_guest_mode mode, void *unused)
-{
-       struct kvm_vm *vm;
-       unsigned long **bitmaps;
-       uint64_t guest_num_pages;
-       uint64_t host_num_pages;
-       uint64_t pages_per_slot;
-       int i;
-       struct kvm_page_stats stats_populated;
-       struct kvm_page_stats stats_dirty_logging_enabled;
-       struct kvm_page_stats stats_dirty_pass[ITERATIONS];
-       struct kvm_page_stats stats_clear_pass[ITERATIONS];
-       struct kvm_page_stats stats_dirty_logging_disabled;
-       struct kvm_page_stats stats_repopulated;
-
-       vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
-                                SLOTS, backing_src, false);
-
-       guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
-       guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
-       host_num_pages = vm_num_host_pages(mode, guest_num_pages);
-       pages_per_slot = host_num_pages / SLOTS;
-       TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
-       TEST_ASSERT(!(host_num_pages % 512),
-                   "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
-
-       bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
-
-       if (dirty_log_manual_caps)
-               vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
-                             dirty_log_manual_caps);
-
-       /* Start the iterations */
-       iteration = -1;
-       host_quit = false;
-
-       for (i = 0; i < VCPUS; i++)
-               vcpu_last_completed_iteration[i] = -1;
-
-       memstress_start_vcpu_threads(VCPUS, vcpu_worker);
-
-       run_vcpu_iteration(vm);
-       get_page_stats(vm, &stats_populated, "populating memory");
-
-       /* Enable dirty logging */
-       memstress_enable_dirty_logging(vm, SLOTS);
-
-       get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
-
-       while (iteration < ITERATIONS) {
-               run_vcpu_iteration(vm);
-               get_page_stats(vm, &stats_dirty_pass[iteration - 1],
-                              "dirtying memory");
-
-               memstress_get_dirty_log(vm, bitmaps, SLOTS);
-
-               if (dirty_log_manual_caps) {
-                       memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
-
-                       get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
-               }
-       }
-
-       /* Disable dirty logging */
-       memstress_disable_dirty_logging(vm, SLOTS);
-
-       get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
-
-       /* Run vCPUs again to fault pages back in. */
-       run_vcpu_iteration(vm);
-       get_page_stats(vm, &stats_repopulated, "repopulating memory");
-
-       /*
-        * Tell the vCPU threads to quit.  No need to manually check that vCPUs
-        * have stopped running after disabling dirty logging, the join will
-        * wait for them to exit.
-        */
-       host_quit = true;
-       memstress_join_vcpu_threads(VCPUS);
-
-       memstress_free_bitmaps(bitmaps, SLOTS);
-       memstress_destroy_vm(vm);
-
-       TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
-                       stats_populated.pages_1g * 512 * 512), host_num_pages);
-
-       /*
-        * Check that all huge pages were split. Since large pages can only
-        * exist in the data slot, and the vCPUs should have dirtied all pages
-        * in the data slot, there should be no huge pages left after splitting.
-        * Splitting happens at dirty log enable time without
-        * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
-        * with that capability.
-        */
-       if (dirty_log_manual_caps) {
-               TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
-               TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
-                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
-                           host_num_pages, stats_clear_pass[0].pages_4k);
-               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
-       } else {
-               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
-               TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
-                           "Expected at least '%lu' 4KiB pages, found only '%lu'",
-                           host_num_pages, stats_dirty_logging_enabled.pages_4k);
-       }
-
-       /*
-        * Once dirty logging is disabled and the vCPUs have touched all their
-        * memory again, the hugepage counts should be the same as they were
-        * right after initial population of memory.
-        */
-       TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
-       TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
-              name);
-       puts("");
-       printf(" -b: specify the size of the memory region which should be\n"
-              "     dirtied by each vCPU. e.g. 10M or 3G.\n"
-              "     (default: 1G)\n");
-       backing_src_help("-s");
-       puts("");
-}
-
-int main(int argc, char *argv[])
-{
-       int opt;
-
-       TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
-       TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
-
-       while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
-               switch (opt) {
-               case 'b':
-                       guest_percpu_mem_size = parse_size(optarg);
-                       break;
-               case 'h':
-                       help(argv[0]);
-                       exit(0);
-               case 's':
-                       backing_src = parse_backing_src_type(optarg);
-                       break;
-               default:
-                       help(argv[0]);
-                       exit(1);
-               }
-       }
-
-       if (!is_backing_src_hugetlb(backing_src)) {
-               pr_info("This test will only work reliably with HugeTLB memory. "
-                       "It can work with THP, but that is best effort.\n");
-       }
-
-       guest_modes_append_default();
-
-       dirty_log_manual_caps = 0;
-       for_each_guest_mode(run_test, NULL);
-
-       dirty_log_manual_caps =
-               kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
-
-       if (dirty_log_manual_caps) {
-               dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
-                                         KVM_DIRTY_LOG_INITIALLY_SET);
-               for_each_guest_mode(run_test, NULL);
-       } else {
-               pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
-       }
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
deleted file mode 100644 (file)
index 8105547..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2022, Google LLC.
- *
- * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
- */
-#include "flds_emulation.h"
-#include "test_util.h"
-#include "ucall_common.h"
-
-#define MMIO_GPA       0x700000000
-#define MMIO_GVA       MMIO_GPA
-
-static void guest_code(void)
-{
-       /* Execute flds with an MMIO address to force KVM to emulate it. */
-       flds(MMIO_GVA);
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
-       virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
-
-       vcpu_run(vcpu);
-       handle_flds_emulation_failure_exit(vcpu);
-       vcpu_run(vcpu);
-       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c
deleted file mode 100644 (file)
index a72f13a..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-static bool is_kvm_controlled_msr(uint32_t msr)
-{
-       return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
-}
-
-/*
- * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
- * MSR, and doesn't allow setting the hidden version.
- */
-static bool is_hidden_vmx_msr(uint32_t msr)
-{
-       switch (msr) {
-       case MSR_IA32_VMX_PINBASED_CTLS:
-       case MSR_IA32_VMX_PROCBASED_CTLS:
-       case MSR_IA32_VMX_EXIT_CTLS:
-       case MSR_IA32_VMX_ENTRY_CTLS:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static bool is_quirked_msr(uint32_t msr)
-{
-       return msr != MSR_AMD64_DE_CFG;
-}
-
-static void test_feature_msr(uint32_t msr)
-{
-       const uint64_t supported_mask = kvm_get_feature_msr(msr);
-       uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       /*
-        * Don't bother testing KVM-controlled MSRs beyond verifying that the
-        * MSR can be read from userspace.  Any value is effectively legal, as
-        * KVM is bound by x86 architecture, not by ABI.
-        */
-       if (is_kvm_controlled_msr(msr))
-               return;
-
-       /*
-        * More goofy behavior.  KVM reports the host CPU's actual revision ID,
-        * but initializes the vCPU's revision ID to an arbitrary value.
-        */
-       if (msr == MSR_IA32_UCODE_REV)
-               reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065;
-
-       /*
-        * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the
-        * full set of features supported by KVM.  For non-quirked MSRs, and
-        * when the quirk is disabled, KVM must zero-initialize the MSR and let
-        * userspace do the configuration.
-        */
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-       TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value,
-                   "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx",
-                   reset_value, is_quirked_msr(msr) ? "" : "non-", msr,
-                   vcpu_get_msr(vcpu, msr));
-       if (!is_hidden_vmx_msr(msr))
-               vcpu_set_msr(vcpu, msr, supported_mask);
-       kvm_vm_free(vm);
-
-       if (is_hidden_vmx_msr(msr))
-               return;
-
-       if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) ||
-           !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
-               return;
-
-       vm = vm_create(1);
-       vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS);
-
-       vcpu = vm_vcpu_add(vm, 0, NULL);
-       TEST_ASSERT(!vcpu_get_msr(vcpu, msr),
-                   "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx",
-                   msr, vcpu_get_msr(vcpu, msr));
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       const struct kvm_msr_list *feature_list;
-       int i;
-
-       /*
-        * Skip the entire test if MSR_FEATURES isn't supported, other tests
-        * will cover the "regular" list of MSRs, the coverage here is purely
-        * opportunistic and not interesting on its own.
-        */
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
-
-       (void)kvm_get_msr_index_list();
-
-       feature_list = kvm_get_feature_msr_index_list();
-       for (i = 0; i < feature_list->nmsrs; i++)
-               test_feature_msr(feature_list->indices[i]);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
deleted file mode 100644 (file)
index 762628f..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for KVM paravirtual feature disablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <linux/stringify.h>
-#include <stdint.h>
-
-#include "kvm_test_harness.h"
-#include "apic.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-/* VMCALL and VMMCALL are both 3-byte opcodes. */
-#define HYPERCALL_INSN_SIZE    3
-
-static bool quirk_disabled;
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
-       regs->rax = -EFAULT;
-       regs->rip += HYPERCALL_INSN_SIZE;
-}
-
-static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE]  = { 0x0f, 0x01, 0xc1 };
-static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
-
-extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
-static uint64_t do_sched_yield(uint8_t apic_id)
-{
-       uint64_t ret;
-
-       asm volatile("hypercall_insn:\n\t"
-                    ".byte 0xcc,0xcc,0xcc\n\t"
-                    : "=a"(ret)
-                    : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
-                    : "memory");
-
-       return ret;
-}
-
-static void guest_main(void)
-{
-       const uint8_t *native_hypercall_insn;
-       const uint8_t *other_hypercall_insn;
-       uint64_t ret;
-
-       if (host_cpu_is_intel) {
-               native_hypercall_insn = vmx_vmcall;
-               other_hypercall_insn  = svm_vmmcall;
-       } else if (host_cpu_is_amd) {
-               native_hypercall_insn = svm_vmmcall;
-               other_hypercall_insn  = vmx_vmcall;
-       } else {
-               GUEST_ASSERT(0);
-               /* unreachable */
-               return;
-       }
-
-       memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
-
-       ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
-
-       /*
-        * If the quirk is disabled, verify that guest_ud_handler() "returned"
-        * -EFAULT and that KVM did NOT patch the hypercall.  If the quirk is
-        * enabled, verify that the hypercall succeeded and that KVM patched in
-        * the "right" hypercall.
-        */
-       if (quirk_disabled) {
-               GUEST_ASSERT(ret == (uint64_t)-EFAULT);
-               GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
-                            HYPERCALL_INSN_SIZE));
-       } else {
-               GUEST_ASSERT(!ret);
-               GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
-                            HYPERCALL_INSN_SIZE));
-       }
-
-       GUEST_DONE();
-}
-
-KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
-               break;
-       case UCALL_DONE:
-               return;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
-                         uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
-       }
-}
-
-static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
-{
-       struct kvm_vm *vm = vcpu->vm;
-
-       vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
-
-       if (disable_quirk)
-               vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
-                             KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
-
-       quirk_disabled = disable_quirk;
-       sync_global_to_guest(vm, quirk_disabled);
-
-       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
-       enter_guest(vcpu);
-}
-
-KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
-{
-       test_fix_hypercall(vcpu, false);
-}
-
-KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
-{
-       test_fix_hypercall(vcpu, true);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
-
-       return test_harness_run(argc, argv);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
deleted file mode 100644 (file)
index 37b1a9f..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef SELFTEST_KVM_FLDS_EMULATION_H
-#define SELFTEST_KVM_FLDS_EMULATION_H
-
-#include "kvm_util.h"
-
-#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
-
-/*
- * flds is an instruction that the KVM instruction emulator is known not to
- * support. This can be used in guest code along with a mechanism to force
- * KVM to emulate the instruction (e.g. by providing an MMIO address) to
- * exercise emulation failures.
- */
-static inline void flds(uint64_t address)
-{
-       __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
-}
-
-static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_regs regs;
-       uint8_t *insn_bytes;
-       uint64_t flags;
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
-
-       TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
-                   "Unexpected suberror: %u",
-                   run->emulation_failure.suberror);
-
-       flags = run->emulation_failure.flags;
-       TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
-                   flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
-                   "run->emulation_failure is missing instruction bytes");
-
-       TEST_ASSERT(run->emulation_failure.insn_size >= 2,
-                   "Expected a 2-byte opcode for 'flds', got %d bytes",
-                   run->emulation_failure.insn_size);
-
-       insn_bytes = run->emulation_failure.insn_bytes;
-       TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
-                   "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
-                   insn_bytes[0], insn_bytes[1]);
-
-       vcpu_regs_get(vcpu, &regs);
-       regs.rip += 2;
-       vcpu_regs_set(vcpu, &regs);
-}
-
-#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
deleted file mode 100644 (file)
index 10b1b0b..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2023, Google LLC.
- */
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
-{
-       const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
-       const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
-       const uint64_t legal = ignored | valid;
-       uint64_t val = BIT_ULL(bit);
-       uint64_t actual;
-       int r;
-
-       r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
-       TEST_ASSERT(val & ~legal ? !r : r == 1,
-                   "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
-                   val, val & ~legal ? "fail" : "succeed");
-
-       actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
-       TEST_ASSERT(actual == (val & valid),
-                   "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
-                   bit, actual, (val & valid));
-
-       vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       unsigned int bit;
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       for (bit = 0; bit < BITS_PER_LONG; bit++)
-               test_hwcr_bit(vcpu, bit);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
deleted file mode 100644 (file)
index e058bc6..0000000
+++ /dev/null
@@ -1,263 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat, Inc.
- *
- * Tests for Hyper-V clocksources
- */
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-struct ms_hyperv_tsc_page {
-       volatile u32 tsc_sequence;
-       u32 reserved1;
-       volatile u64 tsc_scale;
-       volatile s64 tsc_offset;
-} __packed;
-
-/* Simplified mul_u64_u64_shr() */
-static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
-{
-       union {
-               u64 ll;
-               struct {
-                       u32 low, high;
-               } l;
-       } rm, rn, rh, a0, b0;
-       u64 c;
-
-       a0.ll = a;
-       b0.ll = b;
-
-       rm.ll = (u64)a0.l.low * b0.l.high;
-       rn.ll = (u64)a0.l.high * b0.l.low;
-       rh.ll = (u64)a0.l.high * b0.l.high;
-
-       rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
-       rh.l.high = (c >> 32) + rh.l.high;
-
-       return rh.ll;
-}
-
-static inline void nop_loop(void)
-{
-       int i;
-
-       for (i = 0; i < 100000000; i++)
-               asm volatile("nop");
-}
-
-static inline void check_tsc_msr_rdtsc(void)
-{
-       u64 tsc_freq, r1, r2, t1, t2;
-       s64 delta_ns;
-
-       tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
-       GUEST_ASSERT(tsc_freq > 0);
-
-       /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
-       r1 = rdtsc();
-       t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-       r1 = (r1 + rdtsc()) / 2;
-       nop_loop();
-       r2 = rdtsc();
-       t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-       r2 = (r2 + rdtsc()) / 2;
-
-       GUEST_ASSERT(r2 > r1 && t2 > t1);
-
-       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
-       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
-       if (delta_ns < 0)
-               delta_ns = -delta_ns;
-
-       /* 1% tolerance */
-       GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
-}
-
-static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
-{
-       return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
-}
-
-static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
-{
-       u64 r1, r2, t1, t2;
-
-       /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
-       t1 = get_tscpage_ts(tsc_page);
-       r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-
-       /* 10 ms tolerance */
-       GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
-       nop_loop();
-
-       t2 = get_tscpage_ts(tsc_page);
-       r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-       GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
-}
-
-static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
-{
-       u64 tsc_scale, tsc_offset;
-
-       /* Set Guest OS id to enable Hyper-V emulation */
-       GUEST_SYNC(1);
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       GUEST_SYNC(2);
-
-       check_tsc_msr_rdtsc();
-
-       GUEST_SYNC(3);
-
-       /* Set up TSC page is disabled state, check that it's clean */
-       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
-       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
-       GUEST_ASSERT(tsc_page->tsc_scale == 0);
-       GUEST_ASSERT(tsc_page->tsc_offset == 0);
-
-       GUEST_SYNC(4);
-
-       /* Set up TSC page is enabled state */
-       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
-       GUEST_ASSERT(tsc_page->tsc_sequence != 0);
-
-       GUEST_SYNC(5);
-
-       check_tsc_msr_tsc_page(tsc_page);
-
-       GUEST_SYNC(6);
-
-       tsc_offset = tsc_page->tsc_offset;
-       /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
-
-       GUEST_SYNC(7);
-       /* Sanity check TSC page timestamp, it should be close to 0 */
-       GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
-
-       GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
-
-       nop_loop();
-
-       /*
-        * Enable Re-enlightenment and check that TSC page stays constant across
-        * KVM_SET_CLOCK.
-        */
-       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
-       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
-       tsc_offset = tsc_page->tsc_offset;
-       tsc_scale = tsc_page->tsc_scale;
-       GUEST_SYNC(8);
-       GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
-       GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
-
-       GUEST_SYNC(9);
-
-       check_tsc_msr_tsc_page(tsc_page);
-
-       /*
-        * Disable re-enlightenment and TSC page, check that KVM doesn't update
-        * it anymore.
-        */
-       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
-       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
-       wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
-       memset(tsc_page, 0, sizeof(*tsc_page));
-
-       GUEST_SYNC(10);
-       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
-       GUEST_ASSERT(tsc_page->tsc_offset == 0);
-       GUEST_ASSERT(tsc_page->tsc_scale == 0);
-
-       GUEST_DONE();
-}
-
-static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
-{
-       u64 tsc_freq, r1, r2, t1, t2;
-       s64 delta_ns;
-
-       tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
-       TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
-
-       /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
-       r1 = rdtsc();
-       t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
-       r1 = (r1 + rdtsc()) / 2;
-       nop_loop();
-       r2 = rdtsc();
-       t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
-       r2 = (r2 + rdtsc()) / 2;
-
-       TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
-
-       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
-       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
-       if (delta_ns < 0)
-               delta_ns = -delta_ns;
-
-       /* 1% tolerance */
-       TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
-                   "Elapsed time does not match (MSR=%ld, TSC=%ld)",
-                   (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       vm_vaddr_t tsc_page_gva;
-       int stage;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
-       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       vcpu_set_hv_cpuid(vcpu);
-
-       tsc_page_gva = vm_vaddr_alloc_page(vm);
-       memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
-       TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
-               "TSC page has to be page aligned");
-       vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
-
-       host_check_tsc_msr_rdtsc(vcpu);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       /* Keep in sync with guest_main() */
-                       TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
-                                   stage);
-                       goto out;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage,
-                           "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-
-               /* Reset kvmclock triggering TSC page update */
-               if (stage == 7 || stage == 8 || stage == 10) {
-                       struct kvm_clock_data clock = {0};
-
-                       vm_ioctl(vm, KVM_SET_CLOCK, &clock);
-               }
-       }
-
-out:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
deleted file mode 100644 (file)
index 4f5881d..0000000
+++ /dev/null
@@ -1,172 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_CAP_HYPERV_CPUID
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-static void guest_code(void)
-{
-}
-
-static bool smt_possible(void)
-{
-       char buf[16];
-       FILE *f;
-       bool res = true;
-
-       f = fopen("/sys/devices/system/cpu/smt/control", "r");
-       if (f) {
-               if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
-                       if (!strncmp(buf, "forceoff", 8) ||
-                           !strncmp(buf, "notsupported", 12))
-                               res = false;
-               }
-               fclose(f);
-       }
-
-       return res;
-}
-
-static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
-                         bool evmcs_expected)
-{
-       int i;
-       int nent_expected = 10;
-       u32 test_val;
-
-       TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
-                   "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
-                   " (returned %d)",
-                   nent_expected, hv_cpuid_entries->nent);
-
-       for (i = 0; i < hv_cpuid_entries->nent; i++) {
-               const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
-
-               TEST_ASSERT((entry->function >= 0x40000000) &&
-                           (entry->function <= 0x40000082),
-                           "function %x is our of supported range",
-                           entry->function);
-
-               TEST_ASSERT(entry->index == 0,
-                           ".index field should be zero");
-
-               TEST_ASSERT(entry->flags == 0,
-                           ".flags field should be zero");
-
-               TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
-                           !entry->padding[2], "padding should be zero");
-
-               switch (entry->function) {
-               case 0x40000000:
-                       test_val = 0x40000082;
-
-                       TEST_ASSERT(entry->eax == test_val,
-                                   "Wrong max leaf report in 0x40000000.EAX: %x"
-                                   " (evmcs=%d)",
-                                   entry->eax, evmcs_expected
-                               );
-                       break;
-               case 0x40000004:
-                       test_val = entry->eax & (1UL << 18);
-
-                       TEST_ASSERT(!!test_val == !smt_possible(),
-                                   "NoNonArchitecturalCoreSharing bit"
-                                   " doesn't reflect SMT setting");
-                       break;
-               case 0x4000000A:
-                       TEST_ASSERT(entry->eax & (1UL << 19),
-                                   "Enlightened MSR-Bitmap should always be supported"
-                                   " 0x40000000.EAX: %x", entry->eax);
-                       if (evmcs_expected)
-                               TEST_ASSERT((entry->eax & 0xffff) == 0x101,
-                                   "Supported Enlightened VMCS version range is supposed to be 1:1"
-                                   " 0x40000000.EAX: %x", entry->eax);
-
-                       break;
-               default:
-                       break;
-
-               }
-               /*
-                * If needed for debug:
-                * fprintf(stdout,
-                *      "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n",
-                *      entry->function, entry->eax, entry->ebx, entry->ecx,
-                *      entry->edx);
-                */
-       }
-}
-
-void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
-{
-       static struct kvm_cpuid2 cpuid = {.nent = 0};
-       int ret;
-
-       if (vcpu)
-               ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
-       else
-               ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
-
-       TEST_ASSERT(ret == -1 && errno == E2BIG,
-                   "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
-                   " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       const struct kvm_cpuid2 *hv_cpuid_entries;
-       struct kvm_vcpu *vcpu;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       /* Test vCPU ioctl version */
-       test_hv_cpuid_e2big(vm, vcpu);
-
-       hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
-       test_hv_cpuid(hv_cpuid_entries, false);
-       free((void *)hv_cpuid_entries);
-
-       if (!kvm_cpu_has(X86_FEATURE_VMX) ||
-           !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-               print_skip("Enlightened VMCS is unsupported");
-               goto do_sys;
-       }
-       vcpu_enable_evmcs(vcpu);
-       hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
-       test_hv_cpuid(hv_cpuid_entries, true);
-       free((void *)hv_cpuid_entries);
-
-do_sys:
-       /* Test system ioctl version */
-       if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
-               print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
-               goto out;
-       }
-
-       test_hv_cpuid_e2big(vm, NULL);
-
-       hv_cpuid_entries = kvm_get_supported_hv_cpuid();
-       test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
-
-out:
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
deleted file mode 100644 (file)
index 74cf196..0000000
+++ /dev/null
@@ -1,307 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for Enlightened VMCS, including nested guest state.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <linux/bitmap.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-
-#include "hyperv.h"
-#include "vmx.h"
-
-static int ud_count;
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
-       ud_count++;
-       regs->rip += 3; /* VMLAUNCH */
-}
-
-static void guest_nmi_handler(struct ex_regs *regs)
-{
-}
-
-static inline void rdmsr_from_l2(uint32_t msr)
-{
-       /* Currently, L1 doesn't preserve GPRs during vmexits. */
-       __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
-                             "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                             "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-/* Exit to L1 from L2 with RDMSR instruction */
-void l2_guest_code(void)
-{
-       u64 unused;
-
-       GUEST_SYNC(7);
-
-       GUEST_SYNC(8);
-
-       /* Forced exit to L1 upon restore */
-       GUEST_SYNC(9);
-
-       vmcall();
-
-       /* MSR-Bitmap tests */
-       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
-       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
-       rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
-       vmcall();
-       rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
-
-       /* L2 TLB flush tests */
-       hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
-                        HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
-       rdmsr_from_l2(MSR_FS_BASE);
-       /*
-        * Note: hypercall status (RAX) is not preserved correctly by L1 after
-        * synthetic vmexit, use unchecked version.
-        */
-       __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
-                          HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
-                          &unused);
-
-       /* Done, exit to L1 and never come back.  */
-       vmcall();
-}
-
-void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
-               vm_vaddr_t hv_hcall_page_gpa)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
-
-       x2apic_enable();
-
-       GUEST_SYNC(1);
-       GUEST_SYNC(2);
-
-       enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
-       evmcs_enable();
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_SYNC(3);
-       GUEST_ASSERT(load_evmcs(hv_pages));
-       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-
-       GUEST_SYNC(4);
-       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_SYNC(5);
-       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-       current_evmcs->revision_id = -1u;
-       GUEST_ASSERT(vmlaunch());
-       current_evmcs->revision_id = EVMCS_VERSION;
-       GUEST_SYNC(6);
-
-       vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
-               PIN_BASED_NMI_EXITING);
-
-       /* L2 TLB flush setup */
-       current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
-       current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
-       current_evmcs->hv_vm_id = 1;
-       current_evmcs->hv_vp_id = 1;
-       current_vp_assist->nested_control.features.directhypercall = 1;
-       *(u32 *)(hv_pages->partition_assist) = 0;
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
-       GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
-       GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
-
-       /*
-        * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
-        * up-to-date (RIP points where it should and not at the beginning
-        * of l2_guest_code(). GUEST_SYNC(9) checkes that.
-        */
-       GUEST_ASSERT(!vmresume());
-
-       GUEST_SYNC(10);
-
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       current_evmcs->guest_rip += 3; /* vmcall */
-
-       /* Intercept RDMSR 0xc0000100 */
-       vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
-               CPU_BASED_USE_MSR_BITMAPS);
-       __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
-       current_evmcs->guest_rip += 2; /* rdmsr */
-
-       /* Enable enlightened MSR bitmap */
-       current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
-       current_evmcs->guest_rip += 2; /* rdmsr */
-
-       /* Intercept RDMSR 0xc0000101 without telling KVM about it */
-       __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
-       /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
-       current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
-       GUEST_ASSERT(!vmresume());
-       /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       current_evmcs->guest_rip += 3; /* vmcall */
-
-       /* Now tell KVM we've changed MSR-Bitmap */
-       current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
-       current_evmcs->guest_rip += 2; /* rdmsr */
-
-       /*
-        * L2 TLB flush test. First VMCALL should be handled directly by L0,
-        * no VMCALL exit expected.
-        */
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
-       current_evmcs->guest_rip += 2; /* rdmsr */
-       /* Enable synthetic vmexit */
-       *(u32 *)(hv_pages->partition_assist) = 1;
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
-
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       GUEST_SYNC(11);
-
-       /* Try enlightened vmptrld with an incorrect GPA */
-       evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
-       GUEST_ASSERT(vmlaunch());
-       GUEST_ASSERT(ud_count == 1);
-       GUEST_DONE();
-}
-
-void inject_nmi(struct kvm_vcpu *vcpu)
-{
-       struct kvm_vcpu_events events;
-
-       vcpu_events_get(vcpu, &events);
-
-       events.nmi.pending = 1;
-       events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
-
-       vcpu_events_set(vcpu, &events);
-}
-
-static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
-                                       struct kvm_vcpu *vcpu)
-{
-       struct kvm_regs regs1, regs2;
-       struct kvm_x86_state *state;
-
-       state = vcpu_save_state(vcpu);
-       memset(&regs1, 0, sizeof(regs1));
-       vcpu_regs_get(vcpu, &regs1);
-
-       kvm_vm_release(vm);
-
-       /* Restore state in a new VM.  */
-       vcpu = vm_recreate_with_one_vcpu(vm);
-       vcpu_set_hv_cpuid(vcpu);
-       vcpu_enable_evmcs(vcpu);
-       vcpu_load_state(vcpu, state);
-       kvm_x86_state_cleanup(state);
-
-       memset(&regs2, 0, sizeof(regs2));
-       vcpu_regs_get(vcpu, &regs2);
-       TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-                   "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-                   (ulong) regs2.rdi, (ulong) regs2.rsi);
-       return vcpu;
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
-       vm_vaddr_t hcall_page;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int stage;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
-       TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       hcall_page = vm_vaddr_alloc_pages(vm, 1);
-       memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
-
-       vcpu_set_hv_cpuid(vcpu);
-       vcpu_enable_evmcs(vcpu);
-
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
-       vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
-
-       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
-       vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
-
-       pr_info("Running L1 which uses EVMCS to run L2\n");
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               /* UCALL_SYNC is handled here.  */
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-
-               vcpu = save_restore_vm(vm, vcpu);
-
-               /* Force immediate L2->L1 exit before resuming */
-               if (stage == 8) {
-                       pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
-                       inject_nmi(vcpu);
-               }
-
-               /*
-                * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
-                * restored VM (before the first KVM_RUN) to check that
-                * KVM_STATE_NESTED_EVMCS is not lost.
-                */
-               if (stage == 9) {
-                       pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
-                       vcpu = save_restore_vm(vm, vcpu);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
deleted file mode 100644 (file)
index 949e08e..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
- * exit to userspace and receive result in guest.
- *
- * Negative tests are present in hyperv_features.c
- *
- * Copyright 2022 Google LLC
- * Author: Vipin Sharma <vipinsh@google.com>
- */
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-/* Any value is fine */
-#define EXT_CAPABILITIES 0xbull
-
-static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
-                      vm_vaddr_t out_pg_gva)
-{
-       uint64_t *output_gva;
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
-
-       output_gva = (uint64_t *)out_pg_gva;
-
-       hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
-
-       /* TLFS states output will be a uint64_t value */
-       GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
-
-       GUEST_DONE();
-}
-
-int main(void)
-{
-       vm_vaddr_t hcall_out_page;
-       vm_vaddr_t hcall_in_page;
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-       uint64_t *outval;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
-
-       /* Verify if extended hypercalls are supported */
-       if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
-                          HV_ENABLE_EXTENDED_HYPERCALLS)) {
-               print_skip("Extended calls not supported by the kernel");
-               exit(KSFT_SKIP);
-       }
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-       vcpu_set_hv_cpuid(vcpu);
-
-       /* Hypercall input */
-       hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
-       memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
-
-       /* Hypercall output */
-       hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
-       memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
-
-       vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
-                     addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
-                   "Unexpected exit reason: %u (%s)",
-                   run->exit_reason, exit_reason_str(run->exit_reason));
-
-       outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
-       *outval = EXT_CAPABILITIES;
-       run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-                   "Unexpected exit reason: %u (%s)",
-                   run->exit_reason, exit_reason_str(run->exit_reason));
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
-       }
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
deleted file mode 100644 (file)
index 068e9c6..0000000
+++ /dev/null
@@ -1,695 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Red Hat, Inc.
- *
- * Tests for Hyper-V features enablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-/*
- * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
- * but to activate the feature it is sufficient to set it to a non-zero
- * value. Use BIT(0) for that.
- */
-#define HV_PV_SPINLOCKS_TEST            \
-       KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
-
-struct msr_data {
-       uint32_t idx;
-       bool fault_expected;
-       bool write;
-       u64 write_val;
-};
-
-struct hcall_data {
-       uint64_t control;
-       uint64_t expect;
-       bool ud_expected;
-};
-
-static bool is_write_only_msr(uint32_t msr)
-{
-       return msr == HV_X64_MSR_EOI;
-}
-
-static void guest_msr(struct msr_data *msr)
-{
-       uint8_t vector = 0;
-       uint64_t msr_val = 0;
-
-       GUEST_ASSERT(msr->idx);
-
-       if (msr->write)
-               vector = wrmsr_safe(msr->idx, msr->write_val);
-
-       if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
-               vector = rdmsr_safe(msr->idx, &msr_val);
-
-       if (msr->fault_expected)
-               __GUEST_ASSERT(vector == GP_VECTOR,
-                              "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
-                              msr->write ? "WR" : "RD", msr->idx, vector);
-       else
-               __GUEST_ASSERT(!vector,
-                              "Expected success on %sMSR(0x%x), got vector '0x%x'",
-                              msr->write ? "WR" : "RD", msr->idx, vector);
-
-       if (vector || is_write_only_msr(msr->idx))
-               goto done;
-
-       if (msr->write)
-               __GUEST_ASSERT(!vector,
-                              "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
-                              msr->idx, msr->write_val, msr_val);
-
-       /* Invariant TSC bit appears when TSC invariant control MSR is written to */
-       if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
-               if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
-                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
-               else
-                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
-                                    !!(msr_val & HV_INVARIANT_TSC_EXPOSED));
-       }
-
-done:
-       GUEST_DONE();
-}
-
-static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
-{
-       u64 res, input, output;
-       uint8_t vector;
-
-       GUEST_ASSERT_NE(hcall->control, 0);
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
-
-       if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
-               input = pgs_gpa;
-               output = pgs_gpa + 4096;
-       } else {
-               input = output = 0;
-       }
-
-       vector = __hyperv_hypercall(hcall->control, input, output, &res);
-       if (hcall->ud_expected) {
-               __GUEST_ASSERT(vector == UD_VECTOR,
-                              "Expected #UD for control '%lu', got vector '0x%x'",
-                              hcall->control, vector);
-       } else {
-               __GUEST_ASSERT(!vector,
-                              "Expected no exception for control '%lu', got vector '0x%x'",
-                              hcall->control, vector);
-               GUEST_ASSERT_EQ(res, hcall->expect);
-       }
-
-       GUEST_DONE();
-}
-
-static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
-{
-       /*
-        * Enable all supported Hyper-V features, then clear the leafs holding
-        * the features that will be tested one by one.
-        */
-       vcpu_set_hv_cpuid(vcpu);
-
-       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
-       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
-       vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
-}
-
-static void guest_test_msrs_access(void)
-{
-       struct kvm_cpuid2 *prev_cpuid = NULL;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int stage = 0;
-       vm_vaddr_t msr_gva;
-       struct msr_data *msr;
-       bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
-
-       while (true) {
-               vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
-
-               msr_gva = vm_vaddr_alloc_page(vm);
-               memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
-               msr = addr_gva2hva(vm, msr_gva);
-
-               vcpu_args_set(vcpu, 1, msr_gva);
-               vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
-
-               if (!prev_cpuid) {
-                       vcpu_reset_hv_cpuid(vcpu);
-
-                       prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
-               } else {
-                       vcpu_init_cpuid(vcpu, prev_cpuid);
-               }
-
-               /* TODO: Make this entire test easier to maintain. */
-               if (stage >= 21)
-                       vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
-
-               switch (stage) {
-               case 0:
-                       /*
-                        * Only available when Hyper-V identification is set
-                        */
-                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 1:
-                       msr->idx = HV_X64_MSR_HYPERCALL;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 2:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
-                       /*
-                        * HV_X64_MSR_GUEST_OS_ID has to be written first to make
-                        * HV_X64_MSR_HYPERCALL available.
-                        */
-                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
-                       msr->write = true;
-                       msr->write_val = HYPERV_LINUX_OS_ID;
-                       msr->fault_expected = false;
-                       break;
-               case 3:
-                       msr->idx = HV_X64_MSR_GUEST_OS_ID;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 4:
-                       msr->idx = HV_X64_MSR_HYPERCALL;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-
-               case 5:
-                       msr->idx = HV_X64_MSR_VP_RUNTIME;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 6:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
-                       msr->idx = HV_X64_MSR_VP_RUNTIME;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 7:
-                       /* Read only */
-                       msr->idx = HV_X64_MSR_VP_RUNTIME;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 8:
-                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 9:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
-                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 10:
-                       /* Read only */
-                       msr->idx = HV_X64_MSR_TIME_REF_COUNT;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 11:
-                       msr->idx = HV_X64_MSR_VP_INDEX;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 12:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
-                       msr->idx = HV_X64_MSR_VP_INDEX;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 13:
-                       /* Read only */
-                       msr->idx = HV_X64_MSR_VP_INDEX;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 14:
-                       msr->idx = HV_X64_MSR_RESET;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 15:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
-                       msr->idx = HV_X64_MSR_RESET;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 16:
-                       msr->idx = HV_X64_MSR_RESET;
-                       msr->write = true;
-                       /*
-                        * TODO: the test only writes '0' to HV_X64_MSR_RESET
-                        * at the moment, writing some other value there will
-                        * trigger real vCPU reset and the code is not prepared
-                        * to handle it yet.
-                        */
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-
-               case 17:
-                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 18:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
-                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 19:
-                       msr->idx = HV_X64_MSR_REFERENCE_TSC;
-                       msr->write = true;
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-
-               case 20:
-                       msr->idx = HV_X64_MSR_EOM;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 21:
-                       /*
-                        * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
-                        * capability enabled and guest visible CPUID bit unset.
-                        */
-                       msr->idx = HV_X64_MSR_EOM;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 22:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
-                       msr->idx = HV_X64_MSR_EOM;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 23:
-                       msr->idx = HV_X64_MSR_EOM;
-                       msr->write = true;
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-
-               case 24:
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 25:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 26:
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = true;
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-               case 27:
-                       /* Direct mode test */
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = true;
-                       msr->write_val = 1 << 12;
-                       msr->fault_expected = true;
-                       break;
-               case 28:
-                       vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
-                       msr->idx = HV_X64_MSR_STIMER0_CONFIG;
-                       msr->write = true;
-                       msr->write_val = 1 << 12;
-                       msr->fault_expected = false;
-                       break;
-
-               case 29:
-                       msr->idx = HV_X64_MSR_EOI;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 30:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
-                       msr->idx = HV_X64_MSR_EOI;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = false;
-                       break;
-
-               case 31:
-                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 32:
-                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
-                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 33:
-                       /* Read only */
-                       msr->idx = HV_X64_MSR_TSC_FREQUENCY;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 34:
-                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 35:
-                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
-                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 36:
-                       msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = false;
-                       break;
-               case 37:
-                       /* Can only write '0' */
-                       msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = true;
-                       break;
-
-               case 38:
-                       msr->idx = HV_X64_MSR_CRASH_P0;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 39:
-                       vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
-                       msr->idx = HV_X64_MSR_CRASH_P0;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 40:
-                       msr->idx = HV_X64_MSR_CRASH_P0;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = false;
-                       break;
-
-               case 41:
-                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 42:
-                       vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
-                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 43:
-                       msr->idx = HV_X64_MSR_SYNDBG_STATUS;
-                       msr->write = true;
-                       msr->write_val = 0;
-                       msr->fault_expected = false;
-                       break;
-
-               case 44:
-                       /* MSR is not available when CPUID feature bit is unset */
-                       if (!has_invtsc)
-                               goto next_stage;
-                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
-                       msr->write = false;
-                       msr->fault_expected = true;
-                       break;
-               case 45:
-                       /* MSR is vailable when CPUID feature bit is set */
-                       if (!has_invtsc)
-                               goto next_stage;
-                       vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
-                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
-                       msr->write = false;
-                       msr->fault_expected = false;
-                       break;
-               case 46:
-                       /* Writing bits other than 0 is forbidden */
-                       if (!has_invtsc)
-                               goto next_stage;
-                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
-                       msr->write = true;
-                       msr->write_val = 0xdeadbeef;
-                       msr->fault_expected = true;
-                       break;
-               case 47:
-                       /* Setting bit 0 enables the feature */
-                       if (!has_invtsc)
-                               goto next_stage;
-                       msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
-                       msr->write = true;
-                       msr->write_val = 1;
-                       msr->fault_expected = false;
-                       break;
-
-               default:
-                       kvm_vm_free(vm);
-                       return;
-               }
-
-               vcpu_set_cpuid(vcpu);
-
-               memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
-
-               pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
-                        msr->idx, msr->write ? "write" : "read");
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       return;
-               case UCALL_DONE:
-                       break;
-               default:
-                       TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
-                       return;
-               }
-
-next_stage:
-               stage++;
-               kvm_vm_free(vm);
-       }
-}
-
-static void guest_test_hcalls_access(void)
-{
-       struct kvm_cpuid2 *prev_cpuid = NULL;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int stage = 0;
-       vm_vaddr_t hcall_page, hcall_params;
-       struct hcall_data *hcall;
-
-       while (true) {
-               vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
-
-               /* Hypercall input/output */
-               hcall_page = vm_vaddr_alloc_pages(vm, 2);
-               memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
-               hcall_params = vm_vaddr_alloc_page(vm);
-               memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
-               hcall = addr_gva2hva(vm, hcall_params);
-
-               vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
-               vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
-
-               if (!prev_cpuid) {
-                       vcpu_reset_hv_cpuid(vcpu);
-
-                       prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
-               } else {
-                       vcpu_init_cpuid(vcpu, prev_cpuid);
-               }
-
-               switch (stage) {
-               case 0:
-                       vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
-                       hcall->control = 0xbeef;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
-                       break;
-
-               case 1:
-                       hcall->control = HVCALL_POST_MESSAGE;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 2:
-                       vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
-                       hcall->control = HVCALL_POST_MESSAGE;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
-                       break;
-
-               case 3:
-                       hcall->control = HVCALL_SIGNAL_EVENT;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 4:
-                       vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
-                       hcall->control = HVCALL_SIGNAL_EVENT;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
-                       break;
-
-               case 5:
-                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
-                       break;
-               case 6:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
-                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 7:
-                       vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
-                       hcall->control = HVCALL_RESET_DEBUG_SESSION;
-                       hcall->expect = HV_STATUS_OPERATION_DENIED;
-                       break;
-
-               case 8:
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 9:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-               case 10:
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 11:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-
-               case 12:
-                       hcall->control = HVCALL_SEND_IPI;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 13:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
-                       hcall->control = HVCALL_SEND_IPI;
-                       hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
-                       break;
-               case 14:
-                       /* Nothing in 'sparse banks' -> success */
-                       hcall->control = HVCALL_SEND_IPI_EX;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-
-               case 15:
-                       hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 16:
-                       vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
-                       hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-               case 17:
-                       /* XMM fast hypercall */
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
-                       hcall->ud_expected = true;
-                       break;
-               case 18:
-                       vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
-                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
-                       hcall->ud_expected = false;
-                       hcall->expect = HV_STATUS_SUCCESS;
-                       break;
-               case 19:
-                       hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
-                       hcall->expect = HV_STATUS_ACCESS_DENIED;
-                       break;
-               case 20:
-                       vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
-                       hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
-                       hcall->expect = HV_STATUS_INVALID_PARAMETER;
-                       break;
-               case 21:
-                       kvm_vm_free(vm);
-                       return;
-               }
-
-               vcpu_set_cpuid(vcpu);
-
-               memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
-
-               pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       return;
-               case UCALL_DONE:
-                       break;
-               default:
-                       TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
-                       return;
-               }
-
-               stage++;
-               kvm_vm_free(vm);
-       }
-}
-
-int main(void)
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID));
-
-       pr_info("Testing access to Hyper-V specific MSRs\n");
-       guest_test_msrs_access();
-
-       pr_info("Testing access to Hyper-V hypercalls\n");
-       guest_test_hcalls_access();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
deleted file mode 100644 (file)
index 22c0c12..0000000
+++ /dev/null
@@ -1,308 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
- *
- * Copyright (C) 2022, Red Hat, Inc.
- *
- */
-#include <pthread.h>
-#include <inttypes.h>
-
-#include "kvm_util.h"
-#include "hyperv.h"
-#include "test_util.h"
-#include "vmx.h"
-
-#define RECEIVER_VCPU_ID_1 2
-#define RECEIVER_VCPU_ID_2 65
-
-#define IPI_VECTOR      0xfe
-
-static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
-
-struct hv_vpset {
-       u64 format;
-       u64 valid_bank_mask;
-       u64 bank_contents[2];
-};
-
-enum HV_GENERIC_SET_FORMAT {
-       HV_GENERIC_SET_SPARSE_4K,
-       HV_GENERIC_SET_ALL,
-};
-
-/* HvCallSendSyntheticClusterIpi hypercall */
-struct hv_send_ipi {
-       u32 vector;
-       u32 reserved;
-       u64 cpu_mask;
-};
-
-/* HvCallSendSyntheticClusterIpiEx hypercall */
-struct hv_send_ipi_ex {
-       u32 vector;
-       u32 reserved;
-       struct hv_vpset vp_set;
-};
-
-static inline void hv_init(vm_vaddr_t pgs_gpa)
-{
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
-}
-
-static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
-{
-       u32 vcpu_id;
-
-       x2apic_enable();
-       hv_init(pgs_gpa);
-
-       vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
-
-       /* Signal sender vCPU we're ready */
-       ipis_rcvd[vcpu_id] = (u64)-1;
-
-       for (;;)
-               asm volatile("sti; hlt; cli");
-}
-
-static void guest_ipi_handler(struct ex_regs *regs)
-{
-       u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
-
-       ipis_rcvd[vcpu_id]++;
-       wrmsr(HV_X64_MSR_EOI, 1);
-}
-
-static inline void nop_loop(void)
-{
-       int i;
-
-       for (i = 0; i < 100000000; i++)
-               asm volatile("nop");
-}
-
-static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
-{
-       struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
-       struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
-       int stage = 1, ipis_expected[2] = {0};
-
-       hv_init(pgs_gpa);
-       GUEST_SYNC(stage++);
-
-       /* Wait for receiver vCPUs to come up */
-       while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
-               nop_loop();
-       ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
-
-       /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
-       ipi->vector = IPI_VECTOR;
-       ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
-       hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
-       hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
-                        IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
-       memset(hcall_page, 0, 4096);
-       ipi_ex->vector = IPI_VECTOR;
-       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-       ipi_ex->vp_set.valid_bank_mask = 1 << 0;
-       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
-       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
-                        (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
-       memset(hcall_page, 0, 4096);
-       ipi_ex->vector = IPI_VECTOR;
-       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-       ipi_ex->vp_set.valid_bank_mask = 1 << 1;
-       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
-       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
-                        (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
-       memset(hcall_page, 0, 4096);
-       ipi_ex->vector = IPI_VECTOR;
-       ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-       ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
-       ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
-       ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
-       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
-                        (2 << HV_HYPERCALL_VARHEAD_OFFSET),
-                        IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
-       memset(hcall_page, 0, 4096);
-       ipi_ex->vector = IPI_VECTOR;
-       ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
-       hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-       /*
-        * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
-        */
-       ipi_ex->vp_set.valid_bank_mask = 0;
-       hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
-       hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
-                        IPI_VECTOR, HV_GENERIC_SET_ALL);
-       nop_loop();
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
-       GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
-       GUEST_SYNC(stage++);
-
-       GUEST_DONE();
-}
-
-static void *vcpu_thread(void *arg)
-{
-       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
-       int old, r;
-
-       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
-       TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
-                   vcpu->id, r);
-
-       vcpu_run(vcpu);
-
-       TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
-
-       return NULL;
-}
-
-static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
-{
-       void *retval;
-       int r;
-
-       r = pthread_cancel(thread);
-       TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-
-       r = pthread_join(thread, &retval);
-       TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-       TEST_ASSERT(retval == PTHREAD_CANCELED,
-                   "expected retval=%p, got %p", PTHREAD_CANCELED,
-                   retval);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu[3];
-       vm_vaddr_t hcall_page;
-       pthread_t threads[2];
-       int stage = 1, r;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI));
-
-       vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
-
-       /* Hypercall input/output */
-       hcall_page = vm_vaddr_alloc_pages(vm, 2);
-       memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
-
-       vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
-       vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
-       vcpu_set_hv_cpuid(vcpu[1]);
-
-       vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
-       vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
-       vcpu_set_hv_cpuid(vcpu[2]);
-
-       vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
-
-       vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_hv_cpuid(vcpu[0]);
-
-       r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
-       TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
-
-       r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
-       TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
-
-       while (true) {
-               vcpu_run(vcpu[0]);
-
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu[0], &uc)) {
-               case UCALL_SYNC:
-                       TEST_ASSERT(uc.args[1] == stage,
-                                   "Unexpected stage: %ld (%d expected)",
-                                   uc.args[1], stage);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               stage++;
-       }
-
-done:
-       cancel_join_vcpu_thread(threads[0], vcpu[1]);
-       cancel_join_vcpu_thread(threads[1], vcpu[2]);
-       kvm_vm_free(vm);
-
-       return r;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
deleted file mode 100644 (file)
index 0ddb632..0000000
+++ /dev/null
@@ -1,199 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2022, Red Hat, Inc.
- *
- * Tests for Hyper-V extensions to SVM.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <linux/bitmap.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "hyperv.h"
-
-#define L2_GUEST_STACK_SIZE 256
-
-/* Exit to L1 from L2 with RDMSR instruction */
-static inline void rdmsr_from_l2(uint32_t msr)
-{
-       /* Currently, L1 doesn't preserve GPRs during vmexits. */
-       __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
-                             "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                             "r10", "r11", "r12", "r13", "r14", "r15");
-}
-
-void l2_guest_code(void)
-{
-       u64 unused;
-
-       GUEST_SYNC(3);
-       /* Exit to L1 */
-       vmmcall();
-
-       /* MSR-Bitmap tests */
-       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
-       rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
-       rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
-       vmmcall();
-       rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
-
-       GUEST_SYNC(5);
-
-       /* L2 TLB flush tests */
-       hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
-                        HV_HYPERCALL_FAST_BIT, 0x0,
-                        HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                        HV_FLUSH_ALL_PROCESSORS);
-       rdmsr_from_l2(MSR_FS_BASE);
-       /*
-        * Note: hypercall status (RAX) is not preserved correctly by L1 after
-        * synthetic vmexit, use unchecked version.
-        */
-       __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
-                          HV_HYPERCALL_FAST_BIT, 0x0,
-                          HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                          HV_FLUSH_ALL_PROCESSORS, &unused);
-
-       /* Done, exit to L1 and never come back.  */
-       vmmcall();
-}
-
-static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
-                                                   struct hyperv_test_pages *hv_pages,
-                                                   vm_vaddr_t pgs_gpa)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-       struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
-
-       GUEST_SYNC(1);
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
-       enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
-
-       GUEST_ASSERT(svm->vmcb_gpa);
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm, l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /* L2 TLB flush setup */
-       hve->partition_assist_page = hv_pages->partition_assist_gpa;
-       hve->hv_enlightenments_control.nested_flush_hypercall = 1;
-       hve->hv_vm_id = 1;
-       hve->hv_vp_id = 1;
-       current_vp_assist->nested_control.features.directhypercall = 1;
-       *(u32 *)(hv_pages->partition_assist) = 0;
-
-       GUEST_SYNC(2);
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_SYNC(4);
-       vmcb->save.rip += 3;
-
-       /* Intercept RDMSR 0xc0000100 */
-       vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
-       __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
-       vmcb->save.rip += 2; /* rdmsr */
-
-       /* Enable enlightened MSR bitmap */
-       hve->hv_enlightenments_control.msr_bitmap = 1;
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
-       vmcb->save.rip += 2; /* rdmsr */
-
-       /* Intercept RDMSR 0xc0000101 without telling KVM about it */
-       __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
-       /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
-       vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
-       run_guest(vmcb, svm->vmcb_gpa);
-       /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       vmcb->save.rip += 3; /* vmcall */
-
-       /* Now tell KVM we've changed MSR-Bitmap */
-       vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
-       vmcb->save.rip += 2; /* rdmsr */
-
-
-       /*
-        * L2 TLB flush test. First VMCALL should be handled directly by L0,
-        * no VMCALL exit expected.
-        */
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
-       vmcb->save.rip += 2; /* rdmsr */
-       /* Enable synthetic vmexit */
-       *(u32 *)(hv_pages->partition_assist) = 1;
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
-       GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
-
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_SYNC(6);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
-       vm_vaddr_t hcall_page;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int stage;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-       TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_set_hv_cpuid(vcpu);
-       vcpu_alloc_svm(vm, &nested_gva);
-       vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
-
-       hcall_page = vm_vaddr_alloc_pages(vm, 1);
-       memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
-
-       vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
-       vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               /* UCALL_SYNC is handled here.  */
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
deleted file mode 100644 (file)
index 077cd0e..0000000
+++ /dev/null
@@ -1,680 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
- *
- * Copyright (C) 2022, Red Hat, Inc.
- *
- */
-#include <asm/barrier.h>
-#include <pthread.h>
-#include <inttypes.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-#include "test_util.h"
-#include "vmx.h"
-
-#define WORKER_VCPU_ID_1 2
-#define WORKER_VCPU_ID_2 65
-
-#define NTRY 100
-#define NTEST_PAGES 2
-
-struct hv_vpset {
-       u64 format;
-       u64 valid_bank_mask;
-       u64 bank_contents[];
-};
-
-enum HV_GENERIC_SET_FORMAT {
-       HV_GENERIC_SET_SPARSE_4K,
-       HV_GENERIC_SET_ALL,
-};
-
-#define HV_FLUSH_ALL_PROCESSORS                        BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES    BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY      BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT     BIT(3)
-
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_tlb_flush {
-       u64 address_space;
-       u64 flags;
-       u64 processor_mask;
-       u64 gva_list[];
-} __packed;
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_tlb_flush_ex {
-       u64 address_space;
-       u64 flags;
-       struct hv_vpset hv_vp_set;
-       u64 gva_list[];
-} __packed;
-
-/*
- * Pass the following info to 'workers' and 'sender'
- * - Hypercall page's GVA
- * - Hypercall page's GPA
- * - Test pages GVA
- * - GVAs of the test pages' PTEs
- */
-struct test_data {
-       vm_vaddr_t hcall_gva;
-       vm_paddr_t hcall_gpa;
-       vm_vaddr_t test_pages;
-       vm_vaddr_t test_pages_pte[NTEST_PAGES];
-};
-
-/* 'Worker' vCPU code checking the contents of the test page */
-static void worker_guest_code(vm_vaddr_t test_data)
-{
-       struct test_data *data = (struct test_data *)test_data;
-       u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
-       void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
-       u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
-       u64 expected, val;
-
-       x2apic_enable();
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-
-       for (;;) {
-               cpu_relax();
-
-               expected = READ_ONCE(*this_cpu);
-
-               /*
-                * Make sure the value in the test page is read after reading
-                * the expectation for the first time. Pairs with wmb() in
-                * prepare_to_test().
-                */
-               rmb();
-
-               val = READ_ONCE(*(u64 *)data->test_pages);
-
-               /*
-                * Make sure the value in the test page is read after before
-                * reading the expectation for the second time. Pairs with wmb()
-                * post_test().
-                */
-               rmb();
-
-               /*
-                * '0' indicates the sender is between iterations, wait until
-                * the sender is ready for this vCPU to start checking again.
-                */
-               if (!expected)
-                       continue;
-
-               /*
-                * Re-read the per-vCPU byte to ensure the sender didn't move
-                * onto a new iteration.
-                */
-               if (expected != READ_ONCE(*this_cpu))
-                       continue;
-
-               GUEST_ASSERT(val == expected);
-       }
-}
-
-/*
- * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
- * test page. '0' means don't check.
- */
-static void set_expected_val(void *addr, u64 val, int vcpu_id)
-{
-       void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
-
-       *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
-}
-
-/*
- * Update PTEs swapping two test pages.
- * TODO: use swap()/xchg() when these are provided.
- */
-static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
-{
-       uint64_t tmp = *(uint64_t *)pte_gva1;
-
-       *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
-       *(uint64_t *)pte_gva2 = tmp;
-}
-
-/*
- * TODO: replace the silly NOP loop with a proper udelay() implementation.
- */
-static inline void do_delay(void)
-{
-       int i;
-
-       for (i = 0; i < 1000000; i++)
-               asm volatile("nop");
-}
-
-/*
- * Prepare to test: 'disable' workers by setting the expectation to '0',
- * clear hypercall input page and then swap two test pages.
- */
-static inline void prepare_to_test(struct test_data *data)
-{
-       /* Clear hypercall input page */
-       memset((void *)data->hcall_gva, 0, PAGE_SIZE);
-
-       /* 'Disable' workers */
-       set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
-       set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
-
-       /* Make sure workers are 'disabled' before we swap PTEs. */
-       wmb();
-
-       /* Make sure workers have enough time to notice */
-       do_delay();
-
-       /* Swap test page mappings */
-       swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
-}
-
-/*
- * Finalize the test: check hypercall resule set the expected val for
- * 'worker' CPUs and give them some time to test.
- */
-static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
-{
-       /* Make sure we change the expectation after swapping PTEs */
-       wmb();
-
-       /* Set the expectation for workers, '0' means don't test */
-       set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
-       set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
-
-       /* Make sure workers have enough time to test */
-       do_delay();
-}
-
-#define TESTVAL1 0x0101010101010101
-#define TESTVAL2 0x0202020202020202
-
-/* Main vCPU doing the test */
-static void sender_guest_code(vm_vaddr_t test_data)
-{
-       struct test_data *data = (struct test_data *)test_data;
-       struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
-       struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
-       vm_paddr_t hcall_gpa = data->hcall_gpa;
-       int i, stage = 1;
-
-       wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
-       wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
-
-       /* "Slow" hypercalls */
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
-                                hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
-               flush->gva_list[0] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                       HV_FLUSH_ALL_PROCESSORS;
-               flush->processor_mask = 0;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
-                                hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                       HV_FLUSH_ALL_PROCESSORS;
-               flush->gva_list[0] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               /* bank_contents and gva_list occupy the same space, thus [1] */
-               flush_ex->gva_list[1] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
-                       BIT_ULL(WORKER_VCPU_ID_1 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
-               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                (2 << HV_HYPERCALL_VARHEAD_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
-                       BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
-               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               /* bank_contents and gva_list occupy the same space, thus [2] */
-               flush_ex->gva_list[2] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
-               flush_ex->gva_list[0] = (u64)data->test_pages;
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                hcall_gpa, hcall_gpa + PAGE_SIZE);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       /* "Fast" hypercalls */
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
-               hyperv_write_xmm_input(&flush->processor_mask, 1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
-                                HV_HYPERCALL_FAST_BIT, 0x0,
-                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->processor_mask = BIT(WORKER_VCPU_ID_1);
-               flush->gva_list[0] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush->processor_mask, 1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               hyperv_write_xmm_input(&flush->processor_mask, 1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
-                                HV_HYPERCALL_FAST_BIT, 0x0,
-                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                                HV_FLUSH_ALL_PROCESSORS);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush->gva_list[0] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush->processor_mask, 1);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
-                                HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
-                                HV_FLUSH_ALL_PROCESSORS);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               /* bank_contents and gva_list occupy the same space, thus [1] */
-               flush_ex->gva_list[1] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
-                       BIT_ULL(WORKER_VCPU_ID_1 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
-               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (2 << HV_HYPERCALL_VARHEAD_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 :
-                         TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
-               flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
-                       BIT_ULL(WORKER_VCPU_ID_2 / 64);
-               flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
-               flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
-               /* bank_contents and gva_list occupy the same space, thus [2] */
-               flush_ex->gva_list[2] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
-                                HV_HYPERCALL_FAST_BIT,
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_SYNC(stage++);
-
-       /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
-       for (i = 0; i < NTRY; i++) {
-               prepare_to_test(data);
-               flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
-               flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
-               flush_ex->gva_list[0] = (u64)data->test_pages;
-               hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
-               hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
-                                HV_HYPERCALL_FAST_BIT |
-                                (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
-                                0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
-               post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
-                         i % 2 ? TESTVAL1 : TESTVAL2);
-       }
-
-       GUEST_DONE();
-}
-
-static void *vcpu_thread(void *arg)
-{
-       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
-       struct ucall uc;
-       int old;
-       int r;
-
-       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
-       TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
-                   vcpu->id, r);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               /* NOT REACHED */
-       default:
-               TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
-       }
-
-       return NULL;
-}
-
-static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
-{
-       void *retval;
-       int r;
-
-       r = pthread_cancel(thread);
-       TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-
-       r = pthread_join(thread, &retval);
-       TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-       TEST_ASSERT(retval == PTHREAD_CANCELED,
-                   "expected retval=%p, got %p", PTHREAD_CANCELED,
-                   retval);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu[3];
-       pthread_t threads[2];
-       vm_vaddr_t test_data_page, gva;
-       vm_paddr_t gpa;
-       uint64_t *pte;
-       struct test_data *data;
-       struct ucall uc;
-       int stage = 1, r, i;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH));
-
-       vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
-
-       /* Test data page */
-       test_data_page = vm_vaddr_alloc_page(vm);
-       data = (struct test_data *)addr_gva2hva(vm, test_data_page);
-
-       /* Hypercall input/output */
-       data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
-       data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
-       memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
-
-       /*
-        * Test pages: the first one is filled with '0x01's, the second with '0x02's
-        * and the test will swap their mappings. The third page keeps the indication
-        * about the current state of mappings.
-        */
-       data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
-       for (i = 0; i < NTEST_PAGES; i++)
-               memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
-                      (u8)(i + 1), PAGE_SIZE);
-       set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
-       set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
-
-       /*
-        * Get PTE pointers for test pages and map them inside the guest.
-        * Use separate page for each PTE for simplicity.
-        */
-       gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
-       for (i = 0; i < NTEST_PAGES; i++) {
-               pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
-               gpa = addr_hva2gpa(vm, pte);
-               __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
-               data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
-       }
-
-       /*
-        * Sender vCPU which performs the test: swaps test pages, sets expectation
-        * for 'workers' and issues TLB flush hypercalls.
-        */
-       vcpu_args_set(vcpu[0], 1, test_data_page);
-       vcpu_set_hv_cpuid(vcpu[0]);
-
-       /* Create worker vCPUs which check the contents of the test pages */
-       vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
-       vcpu_args_set(vcpu[1], 1, test_data_page);
-       vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
-       vcpu_set_hv_cpuid(vcpu[1]);
-
-       vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
-       vcpu_args_set(vcpu[2], 1, test_data_page);
-       vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
-       vcpu_set_hv_cpuid(vcpu[2]);
-
-       r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
-       TEST_ASSERT(!r, "pthread_create() failed");
-
-       r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
-       TEST_ASSERT(!r, "pthread_create() failed");
-
-       while (true) {
-               vcpu_run(vcpu[0]);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu[0], &uc)) {
-               case UCALL_SYNC:
-                       TEST_ASSERT(uc.args[1] == stage,
-                                   "Unexpected stage: %ld (%d expected)",
-                                   uc.args[1], stage);
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               stage++;
-       }
-
-done:
-       cancel_join_vcpu_thread(threads[0], vcpu[1]);
-       cancel_join_vcpu_thread(threads[1], vcpu[2]);
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
deleted file mode 100644 (file)
index 5bc1222..0000000
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2021, Google LLC.
- *
- * Tests for adjusting the KVM clock from userspace
- */
-#include <asm/kvm_para.h>
-#include <asm/pvclock.h>
-#include <asm/pvclock-abi.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <time.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-struct test_case {
-       uint64_t kvmclock_base;
-       int64_t realtime_offset;
-};
-
-static struct test_case test_cases[] = {
-       { .kvmclock_base = 0 },
-       { .kvmclock_base = 180 * NSEC_PER_SEC },
-       { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
-       { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
-};
-
-#define GUEST_SYNC_CLOCK(__stage, __val)                       \
-               GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
-
-static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
-{
-       int i;
-
-       wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
-       for (i = 0; i < ARRAY_SIZE(test_cases); i++)
-               GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
-}
-
-#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
-
-static inline void assert_flags(struct kvm_clock_data *data)
-{
-       TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
-                   "unexpected clock data flags: %x (want set: %x)",
-                   data->flags, EXPECTED_FLAGS);
-}
-
-static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
-                       struct kvm_clock_data *end)
-{
-       uint64_t obs, exp_lo, exp_hi;
-
-       obs = uc->args[2];
-       exp_lo = start->clock;
-       exp_hi = end->clock;
-
-       assert_flags(start);
-       assert_flags(end);
-
-       TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
-                   "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
-                   obs, exp_lo, exp_hi);
-
-       pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
-               obs, exp_lo, exp_hi);
-}
-
-static void handle_abort(struct ucall *uc)
-{
-       REPORT_GUEST_ASSERT(*uc);
-}
-
-static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
-{
-       struct kvm_clock_data data;
-
-       memset(&data, 0, sizeof(data));
-
-       data.clock = test_case->kvmclock_base;
-       if (test_case->realtime_offset) {
-               struct timespec ts;
-               int r;
-
-               data.flags |= KVM_CLOCK_REALTIME;
-               do {
-                       r = clock_gettime(CLOCK_REALTIME, &ts);
-                       if (!r)
-                               break;
-               } while (errno == EINTR);
-
-               TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
-
-               data.realtime = ts.tv_sec * NSEC_PER_SEC;
-               data.realtime += ts.tv_nsec;
-               data.realtime += test_case->realtime_offset;
-       }
-
-       vm_ioctl(vm, KVM_SET_CLOCK, &data);
-}
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
-       struct kvm_clock_data start, end;
-       struct kvm_vm *vm = vcpu->vm;
-       struct ucall uc;
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
-               setup_clock(vm, &test_cases[i]);
-
-               vm_ioctl(vm, KVM_GET_CLOCK, &start);
-
-               vcpu_run(vcpu);
-               vm_ioctl(vm, KVM_GET_CLOCK, &end);
-
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       handle_sync(&uc, &start, &end);
-                       break;
-               case UCALL_ABORT:
-                       handle_abort(&uc);
-                       return;
-               default:
-                       TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
-               }
-       }
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t pvti_gva;
-       vm_paddr_t pvti_gpa;
-       struct kvm_vm *vm;
-       int flags;
-
-       flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
-       TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
-
-       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
-       pvti_gpa = addr_gva2gpa(vm, pvti_gva);
-       vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
-
-       enter_guest(vcpu);
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
deleted file mode 100644 (file)
index 78878b3..0000000
+++ /dev/null
@@ -1,190 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for KVM paravirtual feature disablement
- */
-#include <asm/kvm_para.h>
-#include <linux/kvm_para.h>
-#include <stdint.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-struct msr_data {
-       uint32_t idx;
-       const char *name;
-};
-
-#define TEST_MSR(msr) { .idx = msr, .name = #msr }
-#define UCALL_PR_MSR 0xdeadbeef
-#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
-
-/*
- * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
- * written, as the KVM_CPUID_FEATURES leaf is cleared.
- */
-static struct msr_data msrs_to_test[] = {
-       TEST_MSR(MSR_KVM_SYSTEM_TIME),
-       TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
-       TEST_MSR(MSR_KVM_WALL_CLOCK),
-       TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
-       TEST_MSR(MSR_KVM_ASYNC_PF_EN),
-       TEST_MSR(MSR_KVM_STEAL_TIME),
-       TEST_MSR(MSR_KVM_PV_EOI_EN),
-       TEST_MSR(MSR_KVM_POLL_CONTROL),
-       TEST_MSR(MSR_KVM_ASYNC_PF_INT),
-       TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
-};
-
-static void test_msr(struct msr_data *msr)
-{
-       uint64_t ignored;
-       uint8_t vector;
-
-       PR_MSR(msr);
-
-       vector = rdmsr_safe(msr->idx, &ignored);
-       GUEST_ASSERT_EQ(vector, GP_VECTOR);
-
-       vector = wrmsr_safe(msr->idx, 0);
-       GUEST_ASSERT_EQ(vector, GP_VECTOR);
-}
-
-struct hcall_data {
-       uint64_t nr;
-       const char *name;
-};
-
-#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
-#define UCALL_PR_HCALL 0xdeadc0de
-#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
-
-/*
- * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
- * features have been cleared in KVM_CPUID_FEATURES.
- */
-static struct hcall_data hcalls_to_test[] = {
-       TEST_HCALL(KVM_HC_KICK_CPU),
-       TEST_HCALL(KVM_HC_SEND_IPI),
-       TEST_HCALL(KVM_HC_SCHED_YIELD),
-};
-
-static void test_hcall(struct hcall_data *hc)
-{
-       uint64_t r;
-
-       PR_HCALL(hc);
-       r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
-       GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
-}
-
-static void guest_main(void)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
-               test_msr(&msrs_to_test[i]);
-       }
-
-       for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
-               test_hcall(&hcalls_to_test[i]);
-       }
-
-       GUEST_DONE();
-}
-
-static void pr_msr(struct ucall *uc)
-{
-       struct msr_data *msr = (struct msr_data *)uc->args[0];
-
-       pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
-}
-
-static void pr_hcall(struct ucall *uc)
-{
-       struct hcall_data *hc = (struct hcall_data *)uc->args[0];
-
-       pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
-}
-
-static void enter_guest(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       while (true) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_PR_MSR:
-                       pr_msr(&uc);
-                       break;
-               case UCALL_PR_HCALL:
-                       pr_hcall(&uc);
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       return;
-               case UCALL_DONE:
-                       return;
-               }
-       }
-}
-
-static void test_pv_unhalt(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_cpuid_entry2 *ent;
-       u32 kvm_sig_old;
-
-       pr_info("testing KVM_FEATURE_PV_UNHALT\n");
-
-       TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
-
-       /* KVM_PV_UNHALT test */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-       vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
-
-       TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
-                   "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
-
-       /* Make sure KVM clears vcpu->arch.kvm_cpuid */
-       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
-       kvm_sig_old = ent->ebx;
-       ent->ebx = 0xdeadbeef;
-       vcpu_set_cpuid(vcpu);
-
-       vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
-       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
-       ent->ebx = kvm_sig_old;
-       vcpu_set_cpuid(vcpu);
-
-       TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
-                   "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
-
-       /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
-
-       kvm_vm_free(vm);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-
-       vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
-
-       vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
-
-       enter_guest(vcpu);
-       kvm_vm_free(vm);
-
-       test_pv_unhalt();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
deleted file mode 100644 (file)
index 7e2bfb3..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * maximum APIC ID capability tests
- *
- * Copyright (C) 2022, Intel, Inc.
- *
- * Tests for getting/setting maximum APIC ID capability
- */
-
-#include "kvm_util.h"
-
-#define MAX_VCPU_ID    2
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones();
-
-       /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
-       ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
-
-       /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
-       ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
-       TEST_ASSERT(ret < 0,
-                   "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
-
-       /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */
-       if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
-               vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID);
-
-               /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */
-               ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1);
-               TEST_ASSERT(ret < 0,
-                           "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail");
-       }
-
-       /* Set KVM_CAP_MAX_VCPU_ID */
-       vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
-
-       /* Try to set KVM_CAP_MAX_VCPU_ID again */
-       ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
-       TEST_ASSERT(ret < 0,
-                   "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
-
-       /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */
-       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
-       TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
-
-       /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */
-       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32));
-       TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail");
-
-       /* Create vCPU with id within bounds */
-       ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0);
-       TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed");
-
-       close(ret);
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
deleted file mode 100644 (file)
index 2b550ef..0000000
+++ /dev/null
@@ -1,129 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define CPUID_MWAIT (1u << 3)
-
-enum monitor_mwait_testcases {
-       MWAIT_QUIRK_DISABLED = BIT(0),
-       MISC_ENABLES_QUIRK_DISABLED = BIT(1),
-       MWAIT_DISABLED = BIT(2),
-};
-
-/*
- * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
- * other scenarios KVM should emulate them as nops.
- */
-#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector)             \
-do {                                                                   \
-       bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) &&      \
-                           ((testcase) & MWAIT_DISABLED);              \
-                                                                       \
-       if (fault_wanted)                                               \
-               __GUEST_ASSERT((vector) == UD_VECTOR,                   \
-                              "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
-                              testcase, vector);                       \
-       else                                                            \
-               __GUEST_ASSERT(!(vector),                               \
-                              "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
-                              testcase, vector);                       \
-} while (0)
-
-static void guest_monitor_wait(int testcase)
-{
-       u8 vector;
-
-       GUEST_SYNC(testcase);
-
-       /*
-        * Arbitrarily MONITOR this function, SVM performs fault checks before
-        * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
-        */
-       vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
-
-       vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
-       guest_monitor_wait(MWAIT_DISABLED);
-
-       guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
-       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
-       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
-       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-       guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       uint64_t disabled_quirks;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       int testcase;
-
-       TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
-
-       while (1) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       testcase = uc.args[1];
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       goto done;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-                       goto done;
-               }
-
-               disabled_quirks = 0;
-               if (testcase & MWAIT_QUIRK_DISABLED)
-                       disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
-               if (testcase & MISC_ENABLES_QUIRK_DISABLED)
-                       disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
-               vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
-               /*
-                * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
-                * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
-                * bit in MISC_ENABLES accordingly.  If the quirk is enabled,
-                * the only valid configuration is MWAIT disabled, as CPUID
-                * can't be manually changed after running the vCPU.
-                */
-               if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
-                       TEST_ASSERT(testcase & MWAIT_DISABLED,
-                                   "Can't toggle CPUID features after running vCPU");
-                       continue;
-               }
-
-               vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
-                            (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
deleted file mode 100644 (file)
index 3eb0313..0000000
+++ /dev/null
@@ -1,288 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-#include "svm_util.h"
-
-#define L2_GUEST_STACK_SIZE 256
-
-/*
- * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
- * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
- */
-#define FAKE_TRIPLE_FAULT_VECTOR       0xaa
-
-/* Arbitrary 32-bit error code injected by this test. */
-#define SS_ERROR_CODE 0xdeadbeef
-
-/*
- * Bit '0' is set on Intel if the exception occurs while delivering a previous
- * event/exception.  AMD's wording is ambiguous, but presumably the bit is set
- * if the exception occurs while delivering an external event, e.g. NMI or INTR,
- * but not for exceptions that occur when delivering other exceptions or
- * software interrupts.
- *
- * Note, Intel's name for it, "External event", is misleading and much more
- * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
- */
-#define ERROR_CODE_EXT_FLAG    BIT(0)
-
-/*
- * Bit '1' is set if the fault occurred when looking up a descriptor in the
- * IDT, which is the case here as the IDT is empty/NULL.
- */
-#define ERROR_CODE_IDT_FLAG    BIT(1)
-
-/*
- * The #GP that occurs when vectoring #SS should show the index into the IDT
- * for #SS, plus have the "IDT flag" set.
- */
-#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
-#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
-
-/*
- * Intel and AMD both shove '0' into the error code on #DF, regardless of what
- * led to the double fault.
- */
-#define DF_ERROR_CODE 0
-
-#define INTERCEPT_SS           (BIT_ULL(SS_VECTOR))
-#define INTERCEPT_SS_DF                (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
-#define INTERCEPT_SS_GP_DF     (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
-
-static void l2_ss_pending_test(void)
-{
-       GUEST_SYNC(SS_VECTOR);
-}
-
-static void l2_ss_injected_gp_test(void)
-{
-       GUEST_SYNC(GP_VECTOR);
-}
-
-static void l2_ss_injected_df_test(void)
-{
-       GUEST_SYNC(DF_VECTOR);
-}
-
-static void l2_ss_injected_tf_test(void)
-{
-       GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
-}
-
-static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
-                      uint32_t error_code)
-{
-       struct vmcb *vmcb = svm->vmcb;
-       struct vmcb_control_area *ctrl = &vmcb->control;
-
-       vmcb->save.rip = (u64)l2_code;
-       run_guest(vmcb, svm->vmcb_gpa);
-
-       if (vector == FAKE_TRIPLE_FAULT_VECTOR)
-               return;
-
-       GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
-       GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
-}
-
-static void l1_svm_code(struct svm_test_data *svm)
-{
-       struct vmcb_control_area *ctrl = &svm->vmcb->control;
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-       svm->vmcb->save.idtr.limit = 0;
-       ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
-
-       ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
-       svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
-       svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
-
-       ctrl->intercept_exceptions = INTERCEPT_SS_DF;
-       svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
-
-       ctrl->intercept_exceptions = INTERCEPT_SS;
-       svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
-       GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
-
-       GUEST_DONE();
-}
-
-static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
-{
-       GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
-
-       GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
-
-       if (vector == FAKE_TRIPLE_FAULT_VECTOR)
-               return;
-
-       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
-       GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
-       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
-}
-
-static void l1_vmx_code(struct vmx_pages *vmx)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
-
-       GUEST_ASSERT_EQ(load_vmcs(vmx), true);
-
-       prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-       GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
-
-       /*
-        * VMX disallows injecting an exception with error_code[31:16] != 0,
-        * and hardware will never generate a VM-Exit with bits 31:16 set.
-        * KVM should likewise truncate the "bad" userspace value.
-        */
-       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
-       vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
-       vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
-
-       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
-       vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
-
-       GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
-       vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
-       GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
-
-       GUEST_DONE();
-}
-
-static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
-{
-       if (this_cpu_has(X86_FEATURE_SVM))
-               l1_svm_code(test_data);
-       else
-               l1_vmx_code(test_data);
-}
-
-static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
-{
-       struct ucall uc;
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               TEST_ASSERT(vector == uc.args[1],
-                           "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
-               break;
-       case UCALL_DONE:
-               TEST_ASSERT(vector == -1,
-                           "Expected L2 to ask for %d, L2 says it's done", vector);
-               break;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       default:
-               TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
-       }
-}
-
-static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
-{
-       struct kvm_vcpu_events events;
-
-       vcpu_events_get(vcpu, &events);
-
-       TEST_ASSERT(!events.exception.pending,
-                   "Vector %d unexpectedlt pending", events.exception.nr);
-       TEST_ASSERT(!events.exception.injected,
-                   "Vector %d unexpectedly injected", events.exception.nr);
-
-       events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
-       events.exception.pending = !inject;
-       events.exception.injected = inject;
-       events.exception.nr = SS_VECTOR;
-       events.exception.has_error_code = true;
-       events.exception.error_code = SS_ERROR_CODE;
-       vcpu_events_set(vcpu, &events);
-}
-
-/*
- * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
- * when an exception is being queued for L2.  Specifically, verify that KVM
- * honors L1 exception intercept controls when a #SS is pending/injected,
- * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
- * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
- */
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t nested_test_data_gva;
-       struct kvm_vcpu_events events;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-       vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
-
-       if (kvm_cpu_has(X86_FEATURE_SVM))
-               vcpu_alloc_svm(vm, &nested_test_data_gva);
-       else
-               vcpu_alloc_vmx(vm, &nested_test_data_gva);
-
-       vcpu_args_set(vcpu, 1, nested_test_data_gva);
-
-       /* Run L1 => L2.  L2 should sync and request #SS. */
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, SS_VECTOR);
-
-       /* Pend #SS and request immediate exit.  #SS should still be pending. */
-       queue_ss_exception(vcpu, false);
-       vcpu->run->immediate_exit = true;
-       vcpu_run_complete_io(vcpu);
-
-       /* Verify the pending events comes back out the same as it went in. */
-       vcpu_events_get(vcpu, &events);
-       TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
-                       KVM_VCPUEVENT_VALID_PAYLOAD);
-       TEST_ASSERT_EQ(events.exception.pending, true);
-       TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
-       TEST_ASSERT_EQ(events.exception.has_error_code, true);
-       TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
-
-       /*
-        * Run for real with the pending #SS, L1 should get a VM-Exit due to
-        * #SS interception and re-enter L2 to request #GP (via injected #SS).
-        */
-       vcpu->run->immediate_exit = false;
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, GP_VECTOR);
-
-       /*
-        * Inject #SS, the #SS should bypass interception and cause #GP, which
-        * L1 should intercept before KVM morphs it to #DF.  L1 should then
-        * disable #GP interception and run L2 to request #DF (via #SS => #GP).
-        */
-       queue_ss_exception(vcpu, true);
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, DF_VECTOR);
-
-       /*
-        * Inject #SS, the #SS should bypass interception and cause #GP, which
-        * L1 is no longer interception, and so should see a #DF VM-Exit.  L1
-        * should then signal that is done.
-        */
-       queue_ss_exception(vcpu, true);
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
-
-       /*
-        * Inject #SS yet again.  L1 is not intercepting #GP or #DF, and so
-        * should see nested TRIPLE_FAULT / SHUTDOWN.
-        */
-       queue_ss_exception(vcpu, true);
-       vcpu_run(vcpu);
-       assert_ucall_vector(vcpu, -1);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
deleted file mode 100644 (file)
index e7efb2b..0000000
+++ /dev/null
@@ -1,266 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Usage: to be run via nx_huge_page_test.sh, which does the necessary
- * environment setup and teardown
- *
- * Copyright (C) 2022, Google LLC.
- */
-#include <fcntl.h>
-#include <stdint.h>
-#include <time.h>
-
-#include <test_util.h>
-#include "kvm_util.h"
-#include "processor.h"
-
-#define HPAGE_SLOT             10
-#define HPAGE_GPA              (4UL << 30) /* 4G prevents collision w/ slot 0 */
-#define HPAGE_GVA              HPAGE_GPA /* GVA is arbitrary, so use GPA. */
-#define PAGES_PER_2MB_HUGE_PAGE 512
-#define HPAGE_SLOT_NPAGES      (3 * PAGES_PER_2MB_HUGE_PAGE)
-
-/*
- * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
- * being run without it.
- */
-#define MAGIC_TOKEN 887563923
-
-/*
- * x86 opcode for the return instruction. Used to call into, and then
- * immediately return from, memory backed with hugepages.
- */
-#define RETURN_OPCODE 0xC3
-
-/* Call the specified memory address. */
-static void guest_do_CALL(uint64_t target)
-{
-       ((void (*)(void)) target)();
-}
-
-/*
- * Exit the VM after each memory access so that the userspace component of the
- * test can make assertions about the pages backing the VM.
- *
- * See the below for an explanation of how each access should affect the
- * backing mappings.
- */
-void guest_code(void)
-{
-       uint64_t hpage_1 = HPAGE_GVA;
-       uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
-       uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
-
-       READ_ONCE(*(uint64_t *)hpage_1);
-       GUEST_SYNC(1);
-
-       READ_ONCE(*(uint64_t *)hpage_2);
-       GUEST_SYNC(2);
-
-       guest_do_CALL(hpage_1);
-       GUEST_SYNC(3);
-
-       guest_do_CALL(hpage_3);
-       GUEST_SYNC(4);
-
-       READ_ONCE(*(uint64_t *)hpage_1);
-       GUEST_SYNC(5);
-
-       READ_ONCE(*(uint64_t *)hpage_3);
-       GUEST_SYNC(6);
-}
-
-static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
-{
-       int actual_pages_2m;
-
-       actual_pages_2m = vm_get_stat(vm, "pages_2m");
-
-       TEST_ASSERT(actual_pages_2m == expected_pages_2m,
-                   "Unexpected 2m page count. Expected %d, got %d",
-                   expected_pages_2m, actual_pages_2m);
-}
-
-static void check_split_count(struct kvm_vm *vm, int expected_splits)
-{
-       int actual_splits;
-
-       actual_splits = vm_get_stat(vm, "nx_lpage_splits");
-
-       TEST_ASSERT(actual_splits == expected_splits,
-                   "Unexpected NX huge page split count. Expected %d, got %d",
-                   expected_splits, actual_splits);
-}
-
-static void wait_for_reclaim(int reclaim_period_ms)
-{
-       long reclaim_wait_ms;
-       struct timespec ts;
-
-       reclaim_wait_ms = reclaim_period_ms * 5;
-       ts.tv_sec = reclaim_wait_ms / 1000;
-       ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
-       nanosleep(&ts, NULL);
-}
-
-void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
-             bool reboot_permissions)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t nr_bytes;
-       void *hva;
-       int r;
-
-       vm = vm_create(1);
-
-       if (disable_nx_huge_pages) {
-               r = __vm_disable_nx_huge_pages(vm);
-               if (reboot_permissions) {
-                       TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
-               } else {
-                       TEST_ASSERT(r == -1 && errno == EPERM,
-                                   "This process should not have permission to disable NX huge pages");
-                       return;
-               }
-       }
-
-       vcpu = vm_vcpu_add(vm, 0, guest_code);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
-                                   HPAGE_GPA, HPAGE_SLOT,
-                                   HPAGE_SLOT_NPAGES, 0);
-
-       nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
-
-       /*
-        * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
-        * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
-        * whenever KVM is shadowing the guest page tables).
-        *
-        * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
-        * pages irrespective of the guest page size, so map with 4KiB pages
-        * to test that that is the case.
-        */
-       if (kvm_is_tdp_enabled())
-               virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
-       else
-               virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
-
-       hva = addr_gpa2hva(vm, HPAGE_GPA);
-       memset(hva, RETURN_OPCODE, nr_bytes);
-
-       check_2m_page_count(vm, 0);
-       check_split_count(vm, 0);
-
-       /*
-        * The guest code will first read from the first hugepage, resulting
-        * in a huge page mapping being created.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, 1);
-       check_split_count(vm, 0);
-
-       /*
-        * Then the guest code will read from the second hugepage, resulting
-        * in another huge page mapping being created.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, 2);
-       check_split_count(vm, 0);
-
-       /*
-        * Next, the guest will execute from the first huge page, causing it
-        * to be remapped at 4k.
-        *
-        * If NX huge pages are disabled, this should have no effect.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
-       check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
-
-       /*
-        * Executing from the third huge page (previously unaccessed) will
-        * cause part to be mapped at 4k.
-        *
-        * If NX huge pages are disabled, it should be mapped at 2M.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
-       check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
-
-       /* Reading from the first huge page again should have no effect. */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
-       check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
-
-       /* Give recovery thread time to run. */
-       wait_for_reclaim(reclaim_period_ms);
-
-       /*
-        * Now that the reclaimer has run, all the split pages should be gone.
-        *
-        * If NX huge pages are disabled, the relaimer will not run, so
-        * nothing should change from here on.
-        */
-       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
-       check_split_count(vm, 0);
-
-       /*
-        * The 4k mapping on hpage 3 should have been removed, so check that
-        * reading from it causes a huge page mapping to be installed.
-        */
-       vcpu_run(vcpu);
-       check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
-       check_split_count(vm, 0);
-
-       kvm_vm_free(vm);
-}
-
-static void help(char *name)
-{
-       puts("");
-       printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
-       puts("");
-       printf(" -p: The NX reclaim period in milliseconds.\n");
-       printf(" -t: The magic token to indicate environment setup is done.\n");
-       printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
-       puts("");
-       exit(0);
-}
-
-int main(int argc, char **argv)
-{
-       int reclaim_period_ms = 0, token = 0, opt;
-       bool reboot_permissions = false;
-
-       while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
-               switch (opt) {
-               case 'p':
-                       reclaim_period_ms = atoi_positive("Reclaim period", optarg);
-                       break;
-               case 't':
-                       token = atoi_paranoid(optarg);
-                       break;
-               case 'r':
-                       reboot_permissions = true;
-                       break;
-               case 'h':
-               default:
-                       help(argv[0]);
-                       break;
-               }
-       }
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
-
-       __TEST_REQUIRE(token == MAGIC_TOKEN,
-                      "This test must be run with the magic token via '-t %d'.\n"
-                      "Running via nx_huge_pages_test.sh, which also handles "
-                      "environment setup, is strongly recommended.", MAGIC_TOKEN);
-
-       run_test(reclaim_period_ms, false, reboot_permissions);
-       run_test(reclaim_period_ms, true, reboot_permissions);
-
-       return 0;
-}
-
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
deleted file mode 100755 (executable)
index caad084..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only */
-#
-# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
-# Makes use of root privileges to set up huge pages and KVM module parameters.
-#
-# Copyright (C) 2022, Google LLC.
-
-set -e
-
-NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
-NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
-NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
-HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
-
-# If we're already root, the host might not have sudo.
-if [ $(whoami) == "root" ]; then
-       function do_sudo () {
-               "$@"
-       }
-else
-       function do_sudo () {
-               sudo "$@"
-       }
-fi
-
-set +e
-
-function sudo_echo () {
-       echo "$1" | do_sudo tee -a "$2" > /dev/null
-}
-
-NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
-
-sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
-
-(
-       set -e
-
-       sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
-       sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
-       sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
-       sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
-
-       # Test with reboot permissions
-       if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
-               echo Running test with CAP_SYS_BOOT enabled
-               $NXECUTABLE -t 887563923 -p 100 -r
-               test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
-       else
-               echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
-       fi
-
-       # Test without reboot permissions
-       if [ $(whoami) != "root" ] ; then
-               echo Running test with CAP_SYS_BOOT disabled
-               $NXECUTABLE -t 887563923 -p 100
-       else
-               echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
-       fi
-)
-RET=$?
-
-sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
-sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
-sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
-sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
-
-exit $RET
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
deleted file mode 100644 (file)
index 9cbf283..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
- *
- * Copyright (C) 2018, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Verifies expected behavior of controlling guest access to
- * MSR_PLATFORM_INFO.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
-
-static void guest_code(void)
-{
-       uint64_t msr_platform_info;
-       uint8_t vector;
-
-       GUEST_SYNC(true);
-       msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
-       GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
-                       MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-
-       GUEST_SYNC(false);
-       vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info);
-       GUEST_ASSERT_EQ(vector, GP_VECTOR);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t msr_platform_info;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
-       vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
-                    msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-
-       for (;;) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               default:
-                       TEST_FAIL("Unexpected ucall %lu", uc.cmd);
-                       break;
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
deleted file mode 100644 (file)
index 698cb36..0000000
+++ /dev/null
@@ -1,644 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2023, Tencent, Inc.
- */
-#include <x86intrin.h>
-
-#include "pmu.h"
-#include "processor.h"
-
-/* Number of iterations of the loop for the guest measurement payload. */
-#define NUM_LOOPS                      10
-
-/* Each iteration of the loop retires one branch instruction. */
-#define NUM_BRANCH_INSNS_RETIRED       (NUM_LOOPS)
-
-/*
- * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
- * 1 LOOP.
- */
-#define NUM_INSNS_PER_LOOP             3
-
-/*
- * Number of "extra" instructions that will be counted, i.e. the number of
- * instructions that are needed to set up the loop and then disable the
- * counter.  2 MOV, 2 XOR, 1 WRMSR.
- */
-#define NUM_EXTRA_INSNS                        5
-
-/* Total number of instructions retired within the measured section. */
-#define NUM_INSNS_RETIRED              (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
-
-
-static uint8_t kvm_pmu_version;
-static bool kvm_has_perf_caps;
-
-static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
-                                                 void *guest_code,
-                                                 uint8_t pmu_version,
-                                                 uint64_t perf_capabilities)
-{
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(vcpu, guest_code);
-       sync_global_to_guest(vm, kvm_pmu_version);
-
-       /*
-        * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
-        * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
-        */
-       if (kvm_has_perf_caps)
-               vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
-
-       vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
-       return vm;
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       do {
-               vcpu_run(vcpu);
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_PRINTF:
-                       pr_info("%s", uc.buffer);
-                       break;
-               case UCALL_DONE:
-                       break;
-               default:
-                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-               }
-       } while (uc.cmd != UCALL_DONE);
-}
-
-static uint8_t guest_get_pmu_version(void)
-{
-       /*
-        * Return the effective PMU version, i.e. the minimum between what KVM
-        * supports and what is enumerated to the guest.  The host deliberately
-        * advertises a PMU version to the guest beyond what is actually
-        * supported by KVM to verify KVM doesn't freak out and do something
-        * bizarre with an architecturally valid, but unsupported, version.
-        */
-       return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
-}
-
-/*
- * If an architectural event is supported and guaranteed to generate at least
- * one "hit, assert that its count is non-zero.  If an event isn't supported or
- * the test can't guarantee the associated action will occur, then all bets are
- * off regarding the count, i.e. no checks can be done.
- *
- * Sanity check that in all cases, the event doesn't count when it's disabled,
- * and that KVM correctly emulates the write of an arbitrary value.
- */
-static void guest_assert_event_count(uint8_t idx,
-                                    struct kvm_x86_pmu_feature event,
-                                    uint32_t pmc, uint32_t pmc_msr)
-{
-       uint64_t count;
-
-       count = _rdpmc(pmc);
-       if (!this_pmu_has(event))
-               goto sanity_checks;
-
-       switch (idx) {
-       case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
-               GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
-               break;
-       case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
-               GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
-               break;
-       case INTEL_ARCH_LLC_REFERENCES_INDEX:
-       case INTEL_ARCH_LLC_MISSES_INDEX:
-               if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
-                   !this_cpu_has(X86_FEATURE_CLFLUSH))
-                       break;
-               fallthrough;
-       case INTEL_ARCH_CPU_CYCLES_INDEX:
-       case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
-               GUEST_ASSERT_NE(count, 0);
-               break;
-       case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
-               GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
-               break;
-       default:
-               break;
-       }
-
-sanity_checks:
-       __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
-       GUEST_ASSERT_EQ(_rdpmc(pmc), count);
-
-       wrmsr(pmc_msr, 0xdead);
-       GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
-}
-
-/*
- * Enable and disable the PMC in a monolithic asm blob to ensure that the
- * compiler can't insert _any_ code into the measured sequence.  Note, ECX
- * doesn't need to be clobbered as the input value, @pmc_msr, is restored
- * before the end of the sequence.
- *
- * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
- * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
- * misses, i.e. to allow testing that those events actually count.
- *
- * If forced emulation is enabled (and specified), force emulation on a subset
- * of the measured code to verify that KVM correctly emulates instructions and
- * branches retired events in conjunction with hardware also counting said
- * events.
- */
-#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)                                \
-do {                                                                           \
-       __asm__ __volatile__("wrmsr\n\t"                                        \
-                            " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t"      \
-                            "1:\n\t"                                           \
-                            clflush "\n\t"                                     \
-                            "mfence\n\t"                                       \
-                            FEP "loop 1b\n\t"                                  \
-                            FEP "mov %%edi, %%ecx\n\t"                         \
-                            FEP "xor %%eax, %%eax\n\t"                         \
-                            FEP "xor %%edx, %%edx\n\t"                         \
-                            "wrmsr\n\t"                                        \
-                            :: "a"((uint32_t)_value), "d"(_value >> 32),       \
-                               "c"(_msr), "D"(_msr)                            \
-       );                                                                      \
-} while (0)
-
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
-do {                                                                           \
-       wrmsr(pmc_msr, 0);                                                      \
-                                                                               \
-       if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))                               \
-               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP);    \
-       else if (this_cpu_has(X86_FEATURE_CLFLUSH))                             \
-               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP);       \
-       else                                                                    \
-               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);             \
-                                                                               \
-       guest_assert_event_count(_idx, _event, _pmc, _pmc_msr);                 \
-} while (0)
-
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
-                                   uint32_t pmc, uint32_t pmc_msr,
-                                   uint32_t ctrl_msr, uint64_t ctrl_msr_value)
-{
-       GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
-
-       if (is_forced_emulation_enabled)
-               GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL                                           \
-({                                                                     \
-       struct kvm_x86_pmu_feature feature = {};                        \
-                                                                       \
-       feature;                                                        \
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
-       return !(*(u64 *)&event);
-}
-
-static void guest_test_arch_event(uint8_t idx)
-{
-       const struct {
-               struct kvm_x86_pmu_feature gp_event;
-               struct kvm_x86_pmu_feature fixed_event;
-       } intel_event_to_feature[] = {
-               [INTEL_ARCH_CPU_CYCLES_INDEX]            = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
-               [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]  = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
-               /*
-                * Note, the fixed counter for reference cycles is NOT the same
-                * as the general purpose architectural event.  The fixed counter
-                * explicitly counts at the same frequency as the TSC, whereas
-                * the GP event counts at a fixed, but uarch specific, frequency.
-                * Bundle them here for simplicity.
-                */
-               [INTEL_ARCH_REFERENCE_CYCLES_INDEX]      = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
-               [INTEL_ARCH_LLC_REFERENCES_INDEX]        = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
-               [INTEL_ARCH_LLC_MISSES_INDEX]            = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
-               [INTEL_ARCH_BRANCHES_RETIRED_INDEX]      = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
-               [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
-               [INTEL_ARCH_TOPDOWN_SLOTS_INDEX]         = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
-       };
-
-       uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
-       uint32_t pmu_version = guest_get_pmu_version();
-       /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
-       bool guest_has_perf_global_ctrl = pmu_version >= 2;
-       struct kvm_x86_pmu_feature gp_event, fixed_event;
-       uint32_t base_pmc_msr;
-       unsigned int i;
-
-       /* The host side shouldn't invoke this without a guest PMU. */
-       GUEST_ASSERT(pmu_version);
-
-       if (this_cpu_has(X86_FEATURE_PDCM) &&
-           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
-               base_pmc_msr = MSR_IA32_PMC0;
-       else
-               base_pmc_msr = MSR_IA32_PERFCTR0;
-
-       gp_event = intel_event_to_feature[idx].gp_event;
-       GUEST_ASSERT_EQ(idx, gp_event.f.bit);
-
-       GUEST_ASSERT(nr_gp_counters);
-
-       for (i = 0; i < nr_gp_counters; i++) {
-               uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
-                                   ARCH_PERFMON_EVENTSEL_ENABLE |
-                                   intel_pmu_arch_events[idx];
-
-               wrmsr(MSR_P6_EVNTSEL0 + i, 0);
-               if (guest_has_perf_global_ctrl)
-                       wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
-
-               __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
-                                       MSR_P6_EVNTSEL0 + i, eventsel);
-       }
-
-       if (!guest_has_perf_global_ctrl)
-               return;
-
-       fixed_event = intel_event_to_feature[idx].fixed_event;
-       if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
-               return;
-
-       i = fixed_event.f.bit;
-
-       wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
-
-       __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
-                               MSR_CORE_PERF_FIXED_CTR0 + i,
-                               MSR_CORE_PERF_GLOBAL_CTRL,
-                               FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
-}
-
-static void guest_test_arch_events(void)
-{
-       uint8_t i;
-
-       for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
-               guest_test_arch_event(i);
-
-       GUEST_DONE();
-}
-
-static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
-                            uint8_t length, uint8_t unavailable_mask)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       /* Testing arch events requires a vPMU (there are no negative tests). */
-       if (!pmu_version)
-               return;
-
-       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
-                                        pmu_version, perf_capabilities);
-
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
-                               length);
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
-                               unavailable_mask);
-
-       run_vcpu(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-/*
- * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
- * that aren't defined counter MSRs *probably* don't exist, but there's no
- * guarantee that currently undefined MSR indices won't be used for something
- * other than PMCs in the future.
- */
-#define MAX_NR_GP_COUNTERS     8
-#define MAX_NR_FIXED_COUNTERS  3
-
-#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)              \
-__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,                      \
-              "Expected %s on " #insn "(0x%x), got vector %u",                 \
-              expect_gp ? "#GP" : "no fault", msr, vector)                     \
-
-#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)                       \
-       __GUEST_ASSERT(val == expected_val,                                     \
-                      "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",    \
-                      msr, expected_val, val);
-
-static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
-                            uint64_t expected_val)
-{
-       uint8_t vector;
-       uint64_t val;
-
-       vector = rdpmc_safe(rdpmc_idx, &val);
-       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
-       if (expect_success)
-               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
-
-       if (!is_forced_emulation_enabled)
-               return;
-
-       vector = rdpmc_safe_fep(rdpmc_idx, &val);
-       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
-       if (expect_success)
-               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
-}
-
-static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
-                                uint8_t nr_counters, uint32_t or_mask)
-{
-       const bool pmu_has_fast_mode = !guest_get_pmu_version();
-       uint8_t i;
-
-       for (i = 0; i < nr_possible_counters; i++) {
-               /*
-                * TODO: Test a value that validates full-width writes and the
-                * width of the counters.
-                */
-               const uint64_t test_val = 0xffff;
-               const uint32_t msr = base_msr + i;
-
-               /*
-                * Fixed counters are supported if the counter is less than the
-                * number of enumerated contiguous counters *or* the counter is
-                * explicitly enumerated in the supported counters mask.
-                */
-               const bool expect_success = i < nr_counters || (or_mask & BIT(i));
-
-               /*
-                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
-                * unsupported, i.e. doesn't #GP and reads back '0'.
-                */
-               const uint64_t expected_val = expect_success ? test_val : 0;
-               const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
-                                      msr != MSR_P6_PERFCTR1;
-               uint32_t rdpmc_idx;
-               uint8_t vector;
-               uint64_t val;
-
-               vector = wrmsr_safe(msr, test_val);
-               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
-
-               vector = rdmsr_safe(msr, &val);
-               GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
-
-               /* On #GP, the result of RDMSR is undefined. */
-               if (!expect_gp)
-                       GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
-
-               /*
-                * Redo the read tests with RDPMC, which has different indexing
-                * semantics and additional capabilities.
-                */
-               rdpmc_idx = i;
-               if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
-                       rdpmc_idx |= INTEL_RDPMC_FIXED;
-
-               guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
-
-               /*
-                * KVM doesn't support non-architectural PMUs, i.e. it should
-                * impossible to have fast mode RDPMC.  Verify that attempting
-                * to use fast RDPMC always #GPs.
-                */
-               GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
-               rdpmc_idx |= INTEL_RDPMC_FAST;
-               guest_test_rdpmc(rdpmc_idx, false, -1ull);
-
-               vector = wrmsr_safe(msr, 0);
-               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
-       }
-}
-
-static void guest_test_gp_counters(void)
-{
-       uint8_t pmu_version = guest_get_pmu_version();
-       uint8_t nr_gp_counters = 0;
-       uint32_t base_msr;
-
-       if (pmu_version)
-               nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
-
-       /*
-        * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
-        * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
-        * of GP counters.  If there are no GP counters, require KVM to leave
-        * PERF_GLOBAL_CTRL '0'.  This edge case isn't covered by the SDM, but
-        * follow the spirit of the architecture and only globally enable GP
-        * counters, of which there are none.
-        */
-       if (pmu_version > 1) {
-               uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
-
-               if (nr_gp_counters)
-                       GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
-               else
-                       GUEST_ASSERT_EQ(global_ctrl, 0);
-       }
-
-       if (this_cpu_has(X86_FEATURE_PDCM) &&
-           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
-               base_msr = MSR_IA32_PMC0;
-       else
-               base_msr = MSR_IA32_PERFCTR0;
-
-       guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
-       GUEST_DONE();
-}
-
-static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
-                            uint8_t nr_gp_counters)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
-                                        pmu_version, perf_capabilities);
-
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
-                               nr_gp_counters);
-
-       run_vcpu(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-static void guest_test_fixed_counters(void)
-{
-       uint64_t supported_bitmask = 0;
-       uint8_t nr_fixed_counters = 0;
-       uint8_t i;
-
-       /* Fixed counters require Architectural vPMU Version 2+. */
-       if (guest_get_pmu_version() >= 2)
-               nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-
-       /*
-        * The supported bitmask for fixed counters was introduced in PMU
-        * version 5.
-        */
-       if (guest_get_pmu_version() >= 5)
-               supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
-
-       guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
-                            nr_fixed_counters, supported_bitmask);
-
-       for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
-               uint8_t vector;
-               uint64_t val;
-
-               if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
-                       vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
-                                           FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
-                       __GUEST_ASSERT(vector == GP_VECTOR,
-                                      "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
-
-                       vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
-                                           FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
-                       __GUEST_ASSERT(vector == GP_VECTOR,
-                                      "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
-                       continue;
-               }
-
-               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
-               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
-               __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-               val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
-
-               GUEST_ASSERT_NE(val, 0);
-       }
-       GUEST_DONE();
-}
-
-static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
-                               uint8_t nr_fixed_counters,
-                               uint32_t supported_bitmask)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
-                                        pmu_version, perf_capabilities);
-
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
-                               supported_bitmask);
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
-                               nr_fixed_counters);
-
-       run_vcpu(vcpu);
-
-       kvm_vm_free(vm);
-}
-
-static void test_intel_counters(void)
-{
-       uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
-       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
-       uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
-       unsigned int i;
-       uint8_t v, j;
-       uint32_t k;
-
-       const uint64_t perf_caps[] = {
-               0,
-               PMU_CAP_FW_WRITES,
-       };
-
-       /*
-        * Test up to PMU v5, which is the current maximum version defined by
-        * Intel, i.e. is the last version that is guaranteed to be backwards
-        * compatible with KVM's existing behavior.
-        */
-       uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
-
-       /*
-        * Detect the existence of events that aren't supported by selftests.
-        * This will (obviously) fail any time the kernel adds support for a
-        * new event, but it's worth paying that price to keep the test fresh.
-        */
-       TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
-                   "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
-                   nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
-
-       /*
-        * Force iterating over known arch events regardless of whether or not
-        * KVM/hardware supports a given event.
-        */
-       nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
-
-       for (v = 0; v <= max_pmu_version; v++) {
-               for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
-                       if (!kvm_has_perf_caps && perf_caps[i])
-                               continue;
-
-                       pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
-                               v, perf_caps[i]);
-                       /*
-                        * To keep the total runtime reasonable, test every
-                        * possible non-zero, non-reserved bitmap combination
-                        * only with the native PMU version and the full bit
-                        * vector length.
-                        */
-                       if (v == pmu_version) {
-                               for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
-                                       test_arch_events(v, perf_caps[i], nr_arch_events, k);
-                       }
-                       /*
-                        * Test single bits for all PMU version and lengths up
-                        * the number of events +1 (to verify KVM doesn't do
-                        * weird things if the guest length is greater than the
-                        * host length).  Explicitly test a mask of '0' and all
-                        * ones i.e. all events being available and unavailable.
-                        */
-                       for (j = 0; j <= nr_arch_events + 1; j++) {
-                               test_arch_events(v, perf_caps[i], j, 0);
-                               test_arch_events(v, perf_caps[i], j, 0xff);
-
-                               for (k = 0; k < nr_arch_events; k++)
-                                       test_arch_events(v, perf_caps[i], j, BIT(k));
-                       }
-
-                       pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
-                               v, perf_caps[i]);
-                       for (j = 0; j <= nr_gp_counters; j++)
-                               test_gp_counters(v, perf_caps[i], j);
-
-                       pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
-                               v, perf_caps[i]);
-                       for (j = 0; j <= nr_fixed_counters; j++) {
-                               for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
-                                       test_fixed_counters(v, perf_caps[i], j, k);
-                       }
-               }
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_is_pmu_enabled());
-
-       TEST_REQUIRE(host_cpu_is_intel);
-       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
-       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
-
-       kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
-       kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
-
-       test_intel_counters();
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
deleted file mode 100644 (file)
index c15513c..0000000
+++ /dev/null
@@ -1,876 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for x86 KVM_SET_PMU_EVENT_FILTER.
- *
- * Copyright (C) 2022, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Verifies the expected behavior of allow lists and deny lists for
- * virtual PMU events.
- */
-#include "kvm_util.h"
-#include "pmu.h"
-#include "processor.h"
-#include "test_util.h"
-
-#define NUM_BRANCHES 42
-#define MAX_TEST_EVENTS                10
-
-#define PMU_EVENT_FILTER_INVALID_ACTION                (KVM_PMU_EVENT_DENY + 1)
-#define PMU_EVENT_FILTER_INVALID_FLAGS                 (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
-#define PMU_EVENT_FILTER_INVALID_NEVENTS               (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
-
-struct __kvm_pmu_event_filter {
-       __u32 action;
-       __u32 nevents;
-       __u32 fixed_counter_bitmap;
-       __u32 flags;
-       __u32 pad[4];
-       __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
-};
-
-/*
- * This event list comprises Intel's known architectural events, plus AMD's
- * Branch Instructions Retired for Zen CPUs.  Note, AMD and Intel use the
- * same encoding for Instructions Retired.
- */
-kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
-
-static const struct __kvm_pmu_event_filter base_event_filter = {
-       .nevents = ARRAY_SIZE(base_event_filter.events),
-       .events = {
-               INTEL_ARCH_CPU_CYCLES,
-               INTEL_ARCH_INSTRUCTIONS_RETIRED,
-               INTEL_ARCH_REFERENCE_CYCLES,
-               INTEL_ARCH_LLC_REFERENCES,
-               INTEL_ARCH_LLC_MISSES,
-               INTEL_ARCH_BRANCHES_RETIRED,
-               INTEL_ARCH_BRANCHES_MISPREDICTED,
-               INTEL_ARCH_TOPDOWN_SLOTS,
-               AMD_ZEN_BRANCHES_RETIRED,
-       },
-};
-
-struct {
-       uint64_t loads;
-       uint64_t stores;
-       uint64_t loads_stores;
-       uint64_t branches_retired;
-       uint64_t instructions_retired;
-} pmc_results;
-
-/*
- * If we encounter a #GP during the guest PMU sanity check, then the guest
- * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
- */
-static void guest_gp_handler(struct ex_regs *regs)
-{
-       GUEST_SYNC(-EFAULT);
-}
-
-/*
- * Check that we can write a new value to the given MSR and read it back.
- * The caller should provide a non-empty set of bits that are safe to flip.
- *
- * Return on success. GUEST_SYNC(0) on error.
- */
-static void check_msr(uint32_t msr, uint64_t bits_to_flip)
-{
-       uint64_t v = rdmsr(msr) ^ bits_to_flip;
-
-       wrmsr(msr, v);
-       if (rdmsr(msr) != v)
-               GUEST_SYNC(-EIO);
-
-       v ^= bits_to_flip;
-       wrmsr(msr, v);
-       if (rdmsr(msr) != v)
-               GUEST_SYNC(-EIO);
-}
-
-static void run_and_measure_loop(uint32_t msr_base)
-{
-       const uint64_t branches_retired = rdmsr(msr_base + 0);
-       const uint64_t insn_retired = rdmsr(msr_base + 1);
-
-       __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
-
-       pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
-       pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
-}
-
-static void intel_guest_code(void)
-{
-       check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
-       check_msr(MSR_P6_EVNTSEL0, 0xffff);
-       check_msr(MSR_IA32_PMC0, 0xffff);
-       GUEST_SYNC(0);
-
-       for (;;) {
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-               wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
-               wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
-
-               run_and_measure_loop(MSR_IA32_PMC0);
-               GUEST_SYNC(0);
-       }
-}
-
-/*
- * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
- * this code uses the always-available, legacy K7 PMU MSRs, which alias to
- * the first four of the six extended core PMU MSRs.
- */
-static void amd_guest_code(void)
-{
-       check_msr(MSR_K7_EVNTSEL0, 0xffff);
-       check_msr(MSR_K7_PERFCTR0, 0xffff);
-       GUEST_SYNC(0);
-
-       for (;;) {
-               wrmsr(MSR_K7_EVNTSEL0, 0);
-               wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
-               wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
-
-               run_and_measure_loop(MSR_K7_PERFCTR0);
-               GUEST_SYNC(0);
-       }
-}
-
-/*
- * Run the VM to the next GUEST_SYNC(value), and return the value passed
- * to the sync. Any other exit from the guest is fatal.
- */
-static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       get_ucall(vcpu, &uc);
-       TEST_ASSERT(uc.cmd == UCALL_SYNC,
-                   "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
-       return uc.args[1];
-}
-
-static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
-{
-       uint64_t r;
-
-       memset(&pmc_results, 0, sizeof(pmc_results));
-       sync_global_to_guest(vcpu->vm, pmc_results);
-
-       r = run_vcpu_to_sync(vcpu);
-       TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
-
-       sync_global_from_guest(vcpu->vm, pmc_results);
-}
-
-/*
- * In a nested environment or if the vPMU is disabled, the guest PMU
- * might not work as architected (accessing the PMU MSRs may raise
- * #GP, or writes could simply be discarded). In those situations,
- * there is no point in running these tests. The guest code will perform
- * a sanity check and then GUEST_SYNC(success). In the case of failure,
- * the behavior of the guest on resumption is undefined.
- */
-static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
-{
-       uint64_t r;
-
-       vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
-       r = run_vcpu_to_sync(vcpu);
-       vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
-
-       return !r;
-}
-
-/*
- * Remove the first occurrence of 'event' (if any) from the filter's
- * event list.
- */
-static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
-{
-       bool found = false;
-       int i;
-
-       for (i = 0; i < f->nevents; i++) {
-               if (found)
-                       f->events[i - 1] = f->events[i];
-               else
-                       found = f->events[i] == event;
-       }
-       if (found)
-               f->nevents--;
-}
-
-#define ASSERT_PMC_COUNTING_INSTRUCTIONS()                                             \
-do {                                                                                   \
-       uint64_t br = pmc_results.branches_retired;                                     \
-       uint64_t ir = pmc_results.instructions_retired;                                 \
-                                                                                       \
-       if (br && br != NUM_BRANCHES)                                                   \
-               pr_info("%s: Branch instructions retired = %lu (expected %u)\n",        \
-                       __func__, br, NUM_BRANCHES);                                    \
-       TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)",         \
-                   __func__, br);                                                      \
-       TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)",                \
-                   __func__, ir);                                                      \
-} while (0)
-
-#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS()                                         \
-do {                                                                                   \
-       uint64_t br = pmc_results.branches_retired;                                     \
-       uint64_t ir = pmc_results.instructions_retired;                                 \
-                                                                                       \
-       TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)",          \
-                   __func__, br);                                                      \
-       TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)",                 \
-                   __func__, ir);                                                      \
-} while (0)
-
-static void test_without_filter(struct kvm_vcpu *vcpu)
-{
-       run_vcpu_and_sync_pmc_results(vcpu);
-
-       ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_with_filter(struct kvm_vcpu *vcpu,
-                            struct __kvm_pmu_event_filter *__f)
-{
-       struct kvm_pmu_event_filter *f = (void *)__f;
-
-       vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
-       run_vcpu_and_sync_pmc_results(vcpu);
-}
-
-static void test_amd_deny_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = {
-               .action = KVM_PMU_EVENT_DENY,
-               .nevents = 1,
-               .events = {
-                       RAW_EVENT(0x1C2, 0),
-               },
-       };
-
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_member_deny_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = KVM_PMU_EVENT_DENY;
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
-}
-
-static void test_member_allow_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = KVM_PMU_EVENT_ALLOW;
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = KVM_PMU_EVENT_DENY;
-
-       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
-       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
-       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_COUNTING_INSTRUCTIONS();
-}
-
-static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = KVM_PMU_EVENT_ALLOW;
-
-       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
-       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
-       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
-       test_with_filter(vcpu, &f);
-
-       ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
-}
-
-/*
- * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
- *
- * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
- */
-static void test_pmu_config_disable(void (*guest_code)(void))
-{
-       struct kvm_vcpu *vcpu;
-       int r;
-       struct kvm_vm *vm;
-
-       r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
-       if (!(r & KVM_PMU_CAP_DISABLE))
-               return;
-
-       vm = vm_create(1);
-
-       vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
-
-       vcpu = vm_vcpu_add(vm, 0, guest_code);
-       TEST_ASSERT(!sanity_check_pmu(vcpu),
-                   "Guest should not be able to use disabled PMU.");
-
-       kvm_vm_free(vm);
-}
-
-/*
- * On Intel, check for a non-zero PMU version, at least one general-purpose
- * counter per logical processor, and support for counting the number of branch
- * instructions retired.
- */
-static bool use_intel_pmu(void)
-{
-       return host_cpu_is_intel &&
-              kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
-              kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
-              kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
-}
-
-/*
- * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding
- * 0xc2,0 for Branch Instructions Retired.
- */
-static bool use_amd_pmu(void)
-{
-       return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
-}
-
-/*
- * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
- * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
- * supported on Intel Xeon processors:
- *  - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
- */
-#define MEM_INST_RETIRED               0xD0
-#define MEM_INST_RETIRED_LOAD          RAW_EVENT(MEM_INST_RETIRED, 0x81)
-#define MEM_INST_RETIRED_STORE         RAW_EVENT(MEM_INST_RETIRED, 0x82)
-#define MEM_INST_RETIRED_LOAD_STORE    RAW_EVENT(MEM_INST_RETIRED, 0x83)
-
-static bool supports_event_mem_inst_retired(void)
-{
-       uint32_t eax, ebx, ecx, edx;
-
-       cpuid(1, &eax, &ebx, &ecx, &edx);
-       if (x86_family(eax) == 0x6) {
-               switch (x86_model(eax)) {
-               /* Sapphire Rapids */
-               case 0x8F:
-               /* Ice Lake */
-               case 0x6A:
-               /* Skylake */
-               /* Cascade Lake */
-               case 0x55:
-                       return true;
-               }
-       }
-
-       return false;
-}
-
-/*
- * "LS Dispatch", from Processor Programming Reference
- * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
- * Preliminary Processor Programming Reference (PPR) for AMD Family
- * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
- * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
- * B1 Processors Volume 1 of 2.
- */
-#define LS_DISPATCH            0x29
-#define LS_DISPATCH_LOAD       RAW_EVENT(LS_DISPATCH, BIT(0))
-#define LS_DISPATCH_STORE      RAW_EVENT(LS_DISPATCH, BIT(1))
-#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
-
-#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
-       KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
-#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
-       KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
-
-static void masked_events_guest_test(uint32_t msr_base)
-{
-       /*
-        * The actual value of the counters don't determine the outcome of
-        * the test.  Only that they are zero or non-zero.
-        */
-       const uint64_t loads = rdmsr(msr_base + 0);
-       const uint64_t stores = rdmsr(msr_base + 1);
-       const uint64_t loads_stores = rdmsr(msr_base + 2);
-       int val;
-
-
-       __asm__ __volatile__("movl $0, %[v];"
-                            "movl %[v], %%eax;"
-                            "incl %[v];"
-                            : [v]"+m"(val) :: "eax");
-
-       pmc_results.loads = rdmsr(msr_base + 0) - loads;
-       pmc_results.stores = rdmsr(msr_base + 1) - stores;
-       pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
-}
-
-static void intel_masked_events_guest_code(void)
-{
-       for (;;) {
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-               wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
-               wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
-               wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
-
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
-
-               masked_events_guest_test(MSR_IA32_PMC0);
-               GUEST_SYNC(0);
-       }
-}
-
-static void amd_masked_events_guest_code(void)
-{
-       for (;;) {
-               wrmsr(MSR_K7_EVNTSEL0, 0);
-               wrmsr(MSR_K7_EVNTSEL1, 0);
-               wrmsr(MSR_K7_EVNTSEL2, 0);
-
-               wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
-               wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
-               wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
-
-               masked_events_guest_test(MSR_K7_PERFCTR0);
-               GUEST_SYNC(0);
-       }
-}
-
-static void run_masked_events_test(struct kvm_vcpu *vcpu,
-                                  const uint64_t masked_events[],
-                                  const int nmasked_events)
-{
-       struct __kvm_pmu_event_filter f = {
-               .nevents = nmasked_events,
-               .action = KVM_PMU_EVENT_ALLOW,
-               .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
-       };
-
-       memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
-       test_with_filter(vcpu, &f);
-}
-
-#define ALLOW_LOADS            BIT(0)
-#define ALLOW_STORES           BIT(1)
-#define ALLOW_LOADS_STORES     BIT(2)
-
-struct masked_events_test {
-       uint64_t intel_events[MAX_TEST_EVENTS];
-       uint64_t intel_event_end;
-       uint64_t amd_events[MAX_TEST_EVENTS];
-       uint64_t amd_event_end;
-       const char *msg;
-       uint32_t flags;
-};
-
-/*
- * These are the test cases for the masked events tests.
- *
- * For each test, the guest enables 3 PMU counters (loads, stores,
- * loads + stores).  The filter is then set in KVM with the masked events
- * provided.  The test then verifies that the counters agree with which
- * ones should be counting and which ones should be filtered.
- */
-const struct masked_events_test test_cases[] = {
-       {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
-               },
-               .msg = "Only allow loads.",
-               .flags = ALLOW_LOADS,
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
-               },
-               .msg = "Only allow stores.",
-               .flags = ALLOW_STORES,
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
-               },
-               .msg = "Only allow loads + stores.",
-               .flags = ALLOW_LOADS_STORES,
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
-                       EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
-               },
-               .msg = "Only allow loads and stores.",
-               .flags = ALLOW_LOADS | ALLOW_STORES,
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
-                       EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
-                       EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
-               },
-               .msg = "Only allow loads and loads + stores.",
-               .flags = ALLOW_LOADS | ALLOW_LOADS_STORES
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
-                       EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
-               },
-               .msg = "Only allow stores and loads + stores.",
-               .flags = ALLOW_STORES | ALLOW_LOADS_STORES
-       }, {
-               .intel_events = {
-                       INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
-               },
-               .amd_events = {
-                       INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
-               },
-               .msg = "Only allow loads, stores, and loads + stores.",
-               .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
-       },
-};
-
-static int append_test_events(const struct masked_events_test *test,
-                             uint64_t *events, int nevents)
-{
-       const uint64_t *evts;
-       int i;
-
-       evts = use_intel_pmu() ? test->intel_events : test->amd_events;
-       for (i = 0; i < MAX_TEST_EVENTS; i++) {
-               if (evts[i] == 0)
-                       break;
-
-               events[nevents + i] = evts[i];
-       }
-
-       return nevents + i;
-}
-
-static bool bool_eq(bool a, bool b)
-{
-       return a == b;
-}
-
-static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
-                                   int nevents)
-{
-       int ntests = ARRAY_SIZE(test_cases);
-       int i, n;
-
-       for (i = 0; i < ntests; i++) {
-               const struct masked_events_test *test = &test_cases[i];
-
-               /* Do any test case events overflow MAX_TEST_EVENTS? */
-               assert(test->intel_event_end == 0);
-               assert(test->amd_event_end == 0);
-
-               n = append_test_events(test, events, nevents);
-
-               run_masked_events_test(vcpu, events, n);
-
-               TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
-                           bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
-                           bool_eq(pmc_results.loads_stores,
-                                   test->flags & ALLOW_LOADS_STORES),
-                           "%s  loads: %lu, stores: %lu, loads + stores: %lu",
-                           test->msg, pmc_results.loads, pmc_results.stores,
-                           pmc_results.loads_stores);
-       }
-}
-
-static void add_dummy_events(uint64_t *events, int nevents)
-{
-       int i;
-
-       for (i = 0; i < nevents; i++) {
-               int event_select = i % 0xFF;
-               bool exclude = ((i % 4) == 0);
-
-               if (event_select == MEM_INST_RETIRED ||
-                   event_select == LS_DISPATCH)
-                       event_select++;
-
-               events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
-                                                       0, exclude);
-       }
-}
-
-static void test_masked_events(struct kvm_vcpu *vcpu)
-{
-       int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
-       uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
-
-       /* Run the test cases against a sparse PMU event filter. */
-       run_masked_events_tests(vcpu, events, 0);
-
-       /* Run the test cases against a dense PMU event filter. */
-       add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
-       run_masked_events_tests(vcpu, events, nevents);
-}
-
-static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
-                               struct __kvm_pmu_event_filter *__f)
-{
-       struct kvm_pmu_event_filter *f = (void *)__f;
-
-       return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
-}
-
-static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
-                                      uint32_t flags, uint32_t action)
-{
-       struct __kvm_pmu_event_filter f = {
-               .nevents = 1,
-               .flags = flags,
-               .action = action,
-               .events = {
-                       event,
-               },
-       };
-
-       return set_pmu_event_filter(vcpu, &f);
-}
-
-static void test_filter_ioctl(struct kvm_vcpu *vcpu)
-{
-       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       struct __kvm_pmu_event_filter f;
-       uint64_t e = ~0ul;
-       int r;
-
-       /*
-        * Unfortunately having invalid bits set in event data is expected to
-        * pass when flags == 0 (bits other than eventsel+umask).
-        */
-       r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
-       TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
-
-       r = set_pmu_single_event_filter(vcpu, e,
-                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
-                                       KVM_PMU_EVENT_ALLOW);
-       TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
-
-       e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
-       r = set_pmu_single_event_filter(vcpu, e,
-                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
-                                       KVM_PMU_EVENT_ALLOW);
-       TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
-
-       f = base_event_filter;
-       f.action = PMU_EVENT_FILTER_INVALID_ACTION;
-       r = set_pmu_event_filter(vcpu, &f);
-       TEST_ASSERT(r, "Set invalid action is expected to fail");
-
-       f = base_event_filter;
-       f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
-       r = set_pmu_event_filter(vcpu, &f);
-       TEST_ASSERT(r, "Set invalid flags is expected to fail");
-
-       f = base_event_filter;
-       f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
-       r = set_pmu_event_filter(vcpu, &f);
-       TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
-
-       f = base_event_filter;
-       f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
-       r = set_pmu_event_filter(vcpu, &f);
-       TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
-}
-
-static void intel_run_fixed_counter_guest_code(uint8_t idx)
-{
-       for (;;) {
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
-
-               /* Only OS_EN bit is enabled for fixed counter[idx]. */
-               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
-               __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
-       }
-}
-
-static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
-                                              uint32_t action, uint32_t bitmap)
-{
-       struct __kvm_pmu_event_filter f = {
-               .action = action,
-               .fixed_counter_bitmap = bitmap,
-       };
-       set_pmu_event_filter(vcpu, &f);
-
-       return run_vcpu_to_sync(vcpu);
-}
-
-static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
-                                                  uint32_t action,
-                                                  uint32_t bitmap)
-{
-       struct __kvm_pmu_event_filter f = base_event_filter;
-
-       f.action = action;
-       f.fixed_counter_bitmap = bitmap;
-       set_pmu_event_filter(vcpu, &f);
-
-       return run_vcpu_to_sync(vcpu);
-}
-
-static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
-                                       uint8_t nr_fixed_counters)
-{
-       unsigned int i;
-       uint32_t bitmap;
-       uint64_t count;
-
-       TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
-                   "Invalid nr_fixed_counters");
-
-       /*
-        * Check the fixed performance counter can count normally when KVM
-        * userspace doesn't set any pmu filter.
-        */
-       count = run_vcpu_to_sync(vcpu);
-       TEST_ASSERT(count, "Unexpected count value: %ld", count);
-
-       for (i = 0; i < BIT(nr_fixed_counters); i++) {
-               bitmap = BIT(i);
-               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
-                                                      bitmap);
-               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
-
-               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
-                                                      bitmap);
-               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
-
-               /*
-                * Check that fixed_counter_bitmap has higher priority than
-                * events[] when both are set.
-                */
-               count = test_set_gp_and_fixed_event_filter(vcpu,
-                                                          KVM_PMU_EVENT_ALLOW,
-                                                          bitmap);
-               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
-
-               count = test_set_gp_and_fixed_event_filter(vcpu,
-                                                          KVM_PMU_EVENT_DENY,
-                                                          bitmap);
-               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
-       }
-}
-
-static void test_fixed_counter_bitmap(void)
-{
-       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       uint8_t idx;
-
-       /*
-        * Check that pmu_event_filter works as expected when it's applied to
-        * fixed performance counters.
-        */
-       for (idx = 0; idx < nr_fixed_counters; idx++) {
-               vm = vm_create_with_one_vcpu(&vcpu,
-                                            intel_run_fixed_counter_guest_code);
-               vcpu_args_set(vcpu, 1, idx);
-               __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
-               kvm_vm_free(vm);
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       void (*guest_code)(void);
-       struct kvm_vcpu *vcpu, *vcpu2 = NULL;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_is_pmu_enabled());
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
-
-       TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
-       guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       TEST_REQUIRE(sanity_check_pmu(vcpu));
-
-       if (use_amd_pmu())
-               test_amd_deny_list(vcpu);
-
-       test_without_filter(vcpu);
-       test_member_deny_list(vcpu);
-       test_member_allow_list(vcpu);
-       test_not_member_deny_list(vcpu);
-       test_not_member_allow_list(vcpu);
-
-       if (use_intel_pmu() &&
-           supports_event_mem_inst_retired() &&
-           kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
-               vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
-       else if (use_amd_pmu())
-               vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
-
-       if (vcpu2)
-               test_masked_events(vcpu2);
-       test_filter_ioctl(vcpu);
-
-       kvm_vm_free(vm);
-
-       test_pmu_config_disable(guest_code);
-       test_fixed_counter_bitmap();
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
deleted file mode 100644 (file)
index 82a8d88..0000000
+++ /dev/null
@@ -1,483 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2022, Google LLC.
- */
-#include <fcntl.h>
-#include <limits.h>
-#include <pthread.h>
-#include <sched.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/kvm_para.h>
-#include <linux/memfd.h>
-#include <linux/sizes.h>
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-#define BASE_DATA_SLOT         10
-#define BASE_DATA_GPA          ((uint64_t)(1ull << 32))
-#define PER_CPU_DATA_SIZE      ((uint64_t)(SZ_2M + PAGE_SIZE))
-
-/* Horrific macro so that the line info is captured accurately :-( */
-#define memcmp_g(gpa, pattern,  size)                                                          \
-do {                                                                                           \
-       uint8_t *mem = (uint8_t *)gpa;                                                          \
-       size_t i;                                                                               \
-                                                                                               \
-       for (i = 0; i < size; i++)                                                              \
-               __GUEST_ASSERT(mem[i] == pattern,                                               \
-                              "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x",       \
-                              pattern, i, gpa + i, mem[i]);                                    \
-} while (0)
-
-static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
-{
-       size_t i;
-
-       for (i = 0; i < size; i++)
-               TEST_ASSERT(mem[i] == pattern,
-                           "Host expected 0x%x at gpa 0x%lx, got 0x%x",
-                           pattern, gpa + i, mem[i]);
-}
-
-/*
- * Run memory conversion tests with explicit conversion:
- * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
- * to back/unback private memory. Subsequent accesses by guest to the gpa range
- * will not cause exit to userspace.
- *
- * Test memory conversion scenarios with following steps:
- * 1) Access private memory using private access and verify that memory contents
- *   are not visible to userspace.
- * 2) Convert memory to shared using explicit conversions and ensure that
- *   userspace is able to access the shared regions.
- * 3) Convert memory back to private using explicit conversions and ensure that
- *   userspace is again not able to access converted private regions.
- */
-
-#define GUEST_STAGE(o, s) { .offset = o, .size = s }
-
-enum ucall_syncs {
-       SYNC_SHARED,
-       SYNC_PRIVATE,
-};
-
-static void guest_sync_shared(uint64_t gpa, uint64_t size,
-                             uint8_t current_pattern, uint8_t new_pattern)
-{
-       GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
-}
-
-static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
-{
-       GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
-}
-
-/* Arbitrary values, KVM doesn't care about the attribute flags. */
-#define MAP_GPA_SET_ATTRIBUTES BIT(0)
-#define MAP_GPA_SHARED         BIT(1)
-#define MAP_GPA_DO_FALLOCATE   BIT(2)
-
-static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
-                         bool do_fallocate)
-{
-       uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
-
-       if (map_shared)
-               flags |= MAP_GPA_SHARED;
-       if (do_fallocate)
-               flags |= MAP_GPA_DO_FALLOCATE;
-       kvm_hypercall_map_gpa_range(gpa, size, flags);
-}
-
-static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
-{
-       guest_map_mem(gpa, size, true, do_fallocate);
-}
-
-static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
-{
-       guest_map_mem(gpa, size, false, do_fallocate);
-}
-
-struct {
-       uint64_t offset;
-       uint64_t size;
-} static const test_ranges[] = {
-       GUEST_STAGE(0, PAGE_SIZE),
-       GUEST_STAGE(0, SZ_2M),
-       GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
-       GUEST_STAGE(PAGE_SIZE, SZ_2M),
-       GUEST_STAGE(SZ_2M, PAGE_SIZE),
-};
-
-static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
-{
-       const uint8_t def_p = 0xaa;
-       const uint8_t init_p = 0xcc;
-       uint64_t j;
-       int i;
-
-       /* Memory should be shared by default. */
-       memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
-       memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
-       guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
-
-       memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
-
-       for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
-               uint64_t gpa = base_gpa + test_ranges[i].offset;
-               uint64_t size = test_ranges[i].size;
-               uint8_t p1 = 0x11;
-               uint8_t p2 = 0x22;
-               uint8_t p3 = 0x33;
-               uint8_t p4 = 0x44;
-
-               /*
-                * Set the test region to pattern one to differentiate it from
-                * the data range as a whole (contains the initial pattern).
-                */
-               memset((void *)gpa, p1, size);
-
-               /*
-                * Convert to private, set and verify the private data, and
-                * then verify that the rest of the data (map shared) still
-                * holds the initial pattern, and that the host always sees the
-                * shared memory (initial pattern).  Unlike shared memory,
-                * punching a hole in private memory is destructive, i.e.
-                * previous values aren't guaranteed to be preserved.
-                */
-               guest_map_private(gpa, size, do_fallocate);
-
-               if (size > PAGE_SIZE) {
-                       memset((void *)gpa, p2, PAGE_SIZE);
-                       goto skip;
-               }
-
-               memset((void *)gpa, p2, size);
-               guest_sync_private(gpa, size, p1);
-
-               /*
-                * Verify that the private memory was set to pattern two, and
-                * that shared memory still holds the initial pattern.
-                */
-               memcmp_g(gpa, p2, size);
-               if (gpa > base_gpa)
-                       memcmp_g(base_gpa, init_p, gpa - base_gpa);
-               if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
-                       memcmp_g(gpa + size, init_p,
-                                (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
-
-               /*
-                * Convert odd-number page frames back to shared to verify KVM
-                * also correctly handles holes in private ranges.
-                */
-               for (j = 0; j < size; j += PAGE_SIZE) {
-                       if ((j >> PAGE_SHIFT) & 1) {
-                               guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
-                               guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
-
-                               memcmp_g(gpa + j, p3, PAGE_SIZE);
-                       } else {
-                               guest_sync_private(gpa + j, PAGE_SIZE, p1);
-                       }
-               }
-
-skip:
-               /*
-                * Convert the entire region back to shared, explicitly write
-                * pattern three to fill in the even-number frames before
-                * asking the host to verify (and write pattern four).
-                */
-               guest_map_shared(gpa, size, do_fallocate);
-               memset((void *)gpa, p3, size);
-               guest_sync_shared(gpa, size, p3, p4);
-               memcmp_g(gpa, p4, size);
-
-               /* Reset the shared memory back to the initial pattern. */
-               memset((void *)gpa, init_p, size);
-
-               /*
-                * Free (via PUNCH_HOLE) *all* private memory so that the next
-                * iteration starts from a clean slate, e.g. with respect to
-                * whether or not there are pages/folios in guest_mem.
-                */
-               guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
-       }
-}
-
-static void guest_punch_hole(uint64_t gpa, uint64_t size)
-{
-       /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
-       uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
-
-       kvm_hypercall_map_gpa_range(gpa, size, flags);
-}
-
-/*
- * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
- * proper conversion.  Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
- * (subsequent fault) should zero memory.
- */
-static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
-{
-       const uint8_t init_p = 0xcc;
-       int i;
-
-       /*
-        * Convert the entire range to private, this testcase is all about
-        * punching holes in guest_memfd, i.e. shared mappings aren't needed.
-        */
-       guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
-
-       for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
-               uint64_t gpa = base_gpa + test_ranges[i].offset;
-               uint64_t size = test_ranges[i].size;
-
-               /*
-                * Free all memory before each iteration, even for the !precise
-                * case where the memory will be faulted back in.  Freeing and
-                * reallocating should obviously work, and freeing all memory
-                * minimizes the probability of cross-testcase influence.
-                */
-               guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
-
-               /* Fault-in and initialize memory, and verify the pattern. */
-               if (precise) {
-                       memset((void *)gpa, init_p, size);
-                       memcmp_g(gpa, init_p, size);
-               } else {
-                       memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
-                       memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
-               }
-
-               /*
-                * Punch a hole at the target range and verify that reads from
-                * the guest succeed and return zeroes.
-                */
-               guest_punch_hole(gpa, size);
-               memcmp_g(gpa, 0, size);
-       }
-}
-
-static void guest_code(uint64_t base_gpa)
-{
-       /*
-        * Run the conversion test twice, with and without doing fallocate() on
-        * the guest_memfd backing when converting between shared and private.
-        */
-       guest_test_explicit_conversion(base_gpa, false);
-       guest_test_explicit_conversion(base_gpa, true);
-
-       /*
-        * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
-        * faulted in, once with only the target range faulted in.
-        */
-       guest_test_punch_hole(base_gpa, false);
-       guest_test_punch_hole(base_gpa, true);
-       GUEST_DONE();
-}
-
-static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-       uint64_t gpa = run->hypercall.args[0];
-       uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
-       bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
-       bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
-       bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
-       struct kvm_vm *vm = vcpu->vm;
-
-       TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
-                   "Wanted MAP_GPA_RANGE (%u), got '%llu'",
-                   KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
-
-       if (do_fallocate)
-               vm_guest_mem_fallocate(vm, gpa, size, map_shared);
-
-       if (set_attributes)
-               vm_set_memory_attributes(vm, gpa, size,
-                                        map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
-       run->hypercall.ret = 0;
-}
-
-static bool run_vcpus;
-
-static void *__test_mem_conversions(void *__vcpu)
-{
-       struct kvm_vcpu *vcpu = __vcpu;
-       struct kvm_run *run = vcpu->run;
-       struct kvm_vm *vm = vcpu->vm;
-       struct ucall uc;
-
-       while (!READ_ONCE(run_vcpus))
-               ;
-
-       for ( ;; ) {
-               vcpu_run(vcpu);
-
-               if (run->exit_reason == KVM_EXIT_HYPERCALL) {
-                       handle_exit_hypercall(vcpu);
-                       continue;
-               }
-
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-                           "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
-                           run->exit_reason, exit_reason_str(run->exit_reason));
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               case UCALL_SYNC: {
-                       uint64_t gpa  = uc.args[1];
-                       size_t size = uc.args[2];
-                       size_t i;
-
-                       TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
-                                   uc.args[0] == SYNC_PRIVATE,
-                                   "Unknown sync command '%ld'", uc.args[0]);
-
-                       for (i = 0; i < size; i += vm->page_size) {
-                               size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
-                               uint8_t *hva = addr_gpa2hva(vm, gpa + i);
-
-                               /* In all cases, the host should observe the shared data. */
-                               memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
-
-                               /* For shared, write the new pattern to guest memory. */
-                               if (uc.args[0] == SYNC_SHARED)
-                                       memset(hva, uc.args[4], nr_bytes);
-                       }
-                       break;
-               }
-               case UCALL_DONE:
-                       return NULL;
-               default:
-                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-               }
-       }
-}
-
-static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
-                                uint32_t nr_memslots)
-{
-       /*
-        * Allocate enough memory so that each vCPU's chunk of memory can be
-        * naturally aligned with respect to the size of the backing store.
-        */
-       const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
-       const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
-       const size_t memfd_size = per_cpu_size * nr_vcpus;
-       const size_t slot_size = memfd_size / nr_memslots;
-       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
-       pthread_t threads[KVM_MAX_VCPUS];
-       struct kvm_vm *vm;
-       int memfd, i, r;
-
-       const struct vm_shape shape = {
-               .mode = VM_MODE_DEFAULT,
-               .type = KVM_X86_SW_PROTECTED_VM,
-       };
-
-       TEST_ASSERT(slot_size * nr_memslots == memfd_size,
-                   "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
-                   memfd_size, nr_memslots);
-       vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
-
-       vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
-
-       memfd = vm_create_guest_memfd(vm, memfd_size, 0);
-
-       for (i = 0; i < nr_memslots; i++)
-               vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
-                          BASE_DATA_SLOT + i, slot_size / vm->page_size,
-                          KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
-
-       for (i = 0; i < nr_vcpus; i++) {
-               uint64_t gpa =  BASE_DATA_GPA + i * per_cpu_size;
-
-               vcpu_args_set(vcpus[i], 1, gpa);
-
-               /*
-                * Map only what is needed so that an out-of-bounds access
-                * results #PF => SHUTDOWN instead of data corruption.
-                */
-               virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
-
-               pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
-       }
-
-       WRITE_ONCE(run_vcpus, true);
-
-       for (i = 0; i < nr_vcpus; i++)
-               pthread_join(threads[i], NULL);
-
-       kvm_vm_free(vm);
-
-       /*
-        * Allocate and free memory from the guest_memfd after closing the VM
-        * fd.  The guest_memfd is gifted a reference to its owning VM, i.e.
-        * should prevent the VM from being fully destroyed until the last
-        * reference to the guest_memfd is also put.
-        */
-       r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
-       TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
-       r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
-       TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
-       close(memfd);
-}
-
-static void usage(const char *cmd)
-{
-       puts("");
-       printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
-       puts("");
-       backing_src_help("-s");
-       puts("");
-       puts(" -n: specify the number of vcpus (default: 1)");
-       puts("");
-       puts(" -m: specify the number of memslots (default: 1)");
-       puts("");
-}
-
-int main(int argc, char *argv[])
-{
-       enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
-       uint32_t nr_memslots = 1;
-       uint32_t nr_vcpus = 1;
-       int opt;
-
-       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
-
-       while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
-               switch (opt) {
-               case 's':
-                       src_type = parse_backing_src_type(optarg);
-                       break;
-               case 'n':
-                       nr_vcpus = atoi_positive("nr_vcpus", optarg);
-                       break;
-               case 'm':
-                       nr_memslots = atoi_positive("nr_memslots", optarg);
-                       break;
-               case 'h':
-               default:
-                       usage(argv[0]);
-                       exit(0);
-               }
-       }
-
-       test_mem_conversions(src_type, nr_vcpus, nr_memslots);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
deleted file mode 100644 (file)
index 13e72fc..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2023, Google LLC.
- */
-#include <linux/kvm.h>
-#include <pthread.h>
-#include <stdint.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-/* Arbitrarily selected to avoid overlaps with anything else */
-#define EXITS_TEST_GVA 0xc0000000
-#define EXITS_TEST_GPA EXITS_TEST_GVA
-#define EXITS_TEST_NPAGES 1
-#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
-#define EXITS_TEST_SLOT 10
-
-static uint64_t guest_repeatedly_read(void)
-{
-       volatile uint64_t value;
-
-       while (true)
-               value = *((uint64_t *) EXITS_TEST_GVA);
-
-       return value;
-}
-
-static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
-{
-       int r;
-
-       r = _vcpu_run(vcpu);
-       if (r) {
-               TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
-               TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
-       }
-       return vcpu->run->exit_reason;
-}
-
-const struct vm_shape protected_vm_shape = {
-       .mode = VM_MODE_DEFAULT,
-       .type = KVM_X86_SW_PROTECTED_VM,
-};
-
-static void test_private_access_memslot_deleted(void)
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       pthread_t vm_thread;
-       void *thread_return;
-       uint32_t exit_reason;
-
-       vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
-                                          guest_repeatedly_read);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   EXITS_TEST_GPA, EXITS_TEST_SLOT,
-                                   EXITS_TEST_NPAGES,
-                                   KVM_MEM_GUEST_MEMFD);
-
-       virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
-
-       /* Request to access page privately */
-       vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
-
-       pthread_create(&vm_thread, NULL,
-                      (void *(*)(void *))run_vcpu_get_exit_reason,
-                      (void *)vcpu);
-
-       vm_mem_region_delete(vm, EXITS_TEST_SLOT);
-
-       pthread_join(vm_thread, &thread_return);
-       exit_reason = (uint32_t)(uint64_t)thread_return;
-
-       TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
-
-       kvm_vm_free(vm);
-}
-
-static void test_private_access_memslot_not_private(void)
-{
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       uint32_t exit_reason;
-
-       vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
-                                          guest_repeatedly_read);
-
-       /* Add a non-private memslot (flags = 0) */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   EXITS_TEST_GPA, EXITS_TEST_SLOT,
-                                   EXITS_TEST_NPAGES, 0);
-
-       virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
-
-       /* Request to access page privately */
-       vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
-
-       exit_reason = run_vcpu_get_exit_reason(vcpu);
-
-       TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
-       TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
-
-       test_private_access_memslot_deleted();
-       test_private_access_memslot_not_private();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
deleted file mode 100644 (file)
index cbc92a8..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test edge cases and race conditions in kvm_recalculate_apic_map().
- */
-
-#include <sys/ioctl.h>
-#include <pthread.h>
-#include <time.h>
-
-#include "processor.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "apic.h"
-
-#define TIMEOUT                5       /* seconds */
-
-#define LAPIC_DISABLED 0
-#define LAPIC_X2APIC   (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
-#define MAX_XAPIC_ID   0xff
-
-static void *race(void *arg)
-{
-       struct kvm_lapic_state lapic = {};
-       struct kvm_vcpu *vcpu = arg;
-
-       while (1) {
-               /* Trigger kvm_recalculate_apic_map(). */
-               vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
-               pthread_testcancel();
-       }
-
-       return NULL;
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
-       struct kvm_vcpu *vcpuN;
-       struct kvm_vm *vm;
-       pthread_t thread;
-       time_t t;
-       int i;
-
-       kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
-
-       /*
-        * Create the max number of vCPUs supported by selftests so that KVM
-        * has decent amount of work to do when recalculating the map, i.e. to
-        * make the problematic window large enough to hit.
-        */
-       vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
-
-       /*
-        * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
-        * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
-        */
-       for (i = 0; i < KVM_MAX_VCPUS; i++)
-               vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
-
-       TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
-
-       vcpuN = vcpus[KVM_MAX_VCPUS - 1];
-       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
-               vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
-               vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
-       }
-
-       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
-       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
deleted file mode 100644 (file)
index 4991378..0000000
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test that KVM_SET_BOOT_CPU_ID works as intended
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "apic.h"
-
-static void guest_bsp_vcpu(void *arg)
-{
-       GUEST_SYNC(1);
-
-       GUEST_ASSERT_NE(get_bsp_flag(), 0);
-
-       GUEST_DONE();
-}
-
-static void guest_not_bsp_vcpu(void *arg)
-{
-       GUEST_SYNC(1);
-
-       GUEST_ASSERT_EQ(get_bsp_flag(), 0);
-
-       GUEST_DONE();
-}
-
-static void test_set_invalid_bsp(struct kvm_vm *vm)
-{
-       unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
-       int r;
-
-       if (max_vcpu_id) {
-               r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1));
-               TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail");
-       }
-
-       r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32));
-       TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail");
-}
-
-static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
-{
-       int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
-                          (void *)(unsigned long)vcpu->id);
-
-       TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-       int stage;
-
-       for (stage = 0; stage < 2; stage++) {
-
-               vcpu_run(vcpu);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                                       uc.args[1] == stage + 1,
-                                       "Stage %d: Unexpected register values vmexit, got %lx",
-                                       stage + 1, (ulong)uc.args[1]);
-                       test_set_bsp_busy(vcpu, "while running vm");
-                       break;
-               case UCALL_DONE:
-                       TEST_ASSERT(stage == 1,
-                                       "Expected GUEST_DONE in stage 2, got stage %d",
-                                       stage);
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               default:
-                       TEST_ASSERT(false, "Unexpected exit: %s",
-                                   exit_reason_str(vcpu->run->exit_reason));
-               }
-       }
-}
-
-static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
-                               struct kvm_vcpu *vcpus[])
-{
-       struct kvm_vm *vm;
-       uint32_t i;
-
-       vm = vm_create(nr_vcpus);
-
-       test_set_invalid_bsp(vm);
-
-       vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
-
-       for (i = 0; i < nr_vcpus; i++)
-               vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
-                                                                guest_not_bsp_vcpu);
-       return vm;
-}
-
-static void run_vm_bsp(uint32_t bsp_vcpu_id)
-{
-       struct kvm_vcpu *vcpus[2];
-       struct kvm_vm *vm;
-
-       vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
-
-       run_vcpu(vcpus[0]);
-       run_vcpu(vcpus[1]);
-
-       kvm_vm_free(vm);
-}
-
-static void check_set_bsp_busy(void)
-{
-       struct kvm_vcpu *vcpus[2];
-       struct kvm_vm *vm;
-
-       vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
-
-       test_set_bsp_busy(vcpus[1], "after adding vcpu");
-
-       run_vcpu(vcpus[0]);
-       run_vcpu(vcpus[1]);
-
-       test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
-
-       run_vm_bsp(0);
-       run_vm_bsp(1);
-       run_vm_bsp(0);
-
-       check_set_bsp_busy();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
deleted file mode 100644 (file)
index c021c07..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM_SET_SREGS tests
- *
- * Copyright (C) 2018, Google LLC.
- *
- * This is a regression test for the bug fixed by the following commit:
- * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
- *
- * That bug allowed a user-mode program that called the KVM_SET_SREGS
- * ioctl to put a VCPU's local APIC into an invalid state.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit)                               \
-do {                                                                           \
-       struct kvm_sregs new;                                                   \
-       int rc;                                                                 \
-                                                                               \
-       /* Skip the sub-test, the feature/bit is supported. */                  \
-       if (orig.cr & bit)                                                      \
-               break;                                                          \
-                                                                               \
-       memcpy(&new, &orig, sizeof(sregs));                                     \
-       new.cr |= bit;                                                          \
-                                                                               \
-       rc = _vcpu_sregs_set(vcpu, &new);                                       \
-       TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit);        \
-                                                                               \
-       /* Sanity check that KVM didn't change anything. */                     \
-       vcpu_sregs_get(vcpu, &new);                                             \
-       TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs");   \
-} while (0)
-
-static uint64_t calc_supported_cr4_feature_bits(void)
-{
-       uint64_t cr4;
-
-       cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
-             X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
-             X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
-       if (kvm_cpu_has(X86_FEATURE_UMIP))
-               cr4 |= X86_CR4_UMIP;
-       if (kvm_cpu_has(X86_FEATURE_LA57))
-               cr4 |= X86_CR4_LA57;
-       if (kvm_cpu_has(X86_FEATURE_VMX))
-               cr4 |= X86_CR4_VMXE;
-       if (kvm_cpu_has(X86_FEATURE_SMX))
-               cr4 |= X86_CR4_SMXE;
-       if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
-               cr4 |= X86_CR4_FSGSBASE;
-       if (kvm_cpu_has(X86_FEATURE_PCID))
-               cr4 |= X86_CR4_PCIDE;
-       if (kvm_cpu_has(X86_FEATURE_XSAVE))
-               cr4 |= X86_CR4_OSXSAVE;
-       if (kvm_cpu_has(X86_FEATURE_SMEP))
-               cr4 |= X86_CR4_SMEP;
-       if (kvm_cpu_has(X86_FEATURE_SMAP))
-               cr4 |= X86_CR4_SMAP;
-       if (kvm_cpu_has(X86_FEATURE_PKU))
-               cr4 |= X86_CR4_PKE;
-
-       return cr4;
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_sregs sregs;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t cr4;
-       int rc, i;
-
-       /*
-        * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
-        * use it to verify all supported CR4 bits can be set prior to defining
-        * the vCPU model, i.e. without doing KVM_SET_CPUID2.
-        */
-       vm = vm_create_barebones();
-       vcpu = __vm_vcpu_add(vm, 0);
-
-       vcpu_sregs_get(vcpu, &sregs);
-
-       sregs.cr0 = 0;
-       sregs.cr4 |= calc_supported_cr4_feature_bits();
-       cr4 = sregs.cr4;
-
-       rc = _vcpu_sregs_set(vcpu, &sregs);
-       TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
-                   sregs.cr4, cr4);
-
-       /* Verify all unsupported features are rejected by KVM. */
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
-       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
-
-       for (i = 32; i < 64; i++)
-               TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
-
-       /* NW without CD is illegal, as is PG without PE. */
-       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
-       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
-
-       kvm_vm_free(vm);
-
-       /* Create a "real" VM and verify APIC_BASE can be set. */
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       sregs.apic_base = 1 << 10;
-       rc = _vcpu_sregs_set(vcpu, &sregs);
-       TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
-                   sregs.apic_base);
-       sregs.apic_base = 1 << 11;
-       rc = _vcpu_sregs_set(vcpu, &sregs);
-       TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
-                   sregs.apic_base);
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
deleted file mode 100644 (file)
index 3fb967f..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/kvm.h>
-#include <linux/psp-sev.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <pthread.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "kselftest.h"
-
-#define SVM_SEV_FEAT_DEBUG_SWAP 32u
-
-/*
- * Some features may have hidden dependencies, or may only work
- * for certain VM types.  Err on the side of safety and don't
- * expect that all supported features can be passed one by one
- * to KVM_SEV_INIT2.
- *
- * (Well, right now there's only one...)
- */
-#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP
-
-int kvm_fd;
-u64 supported_vmsa_features;
-bool have_sev_es;
-
-static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
-{
-       struct kvm_sev_cmd cmd = {
-               .id = cmd_id,
-               .data = (uint64_t)data,
-               .sev_fd = open_sev_dev_path_or_exit(),
-       };
-       int ret;
-
-       ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
-       TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS,
-                   "%d failed: fw error: %d\n",
-                   cmd_id, cmd.error);
-
-       return ret;
-}
-
-static void test_init2(unsigned long vm_type, struct kvm_sev_init *init)
-{
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones_type(vm_type);
-       ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
-       TEST_ASSERT(ret == 0,
-                   "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d",
-                   ret, errno);
-       kvm_vm_free(vm);
-}
-
-static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg)
-{
-       struct kvm_vm *vm;
-       int ret;
-
-       vm = vm_create_barebones_type(vm_type);
-       ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
-       TEST_ASSERT(ret == -1 && errno == EINVAL,
-                   "KVM_SEV_INIT2 should fail, %s.",
-                   msg);
-       kvm_vm_free(vm);
-}
-
-void test_vm_types(void)
-{
-       test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){});
-
-       /*
-        * TODO: check that unsupported types cannot be created.  Probably
-        * a separate selftest.
-        */
-       if (have_sev_es)
-               test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
-
-       test_init2_invalid(0, &(struct kvm_sev_init){},
-                          "VM type is KVM_X86_DEFAULT_VM");
-       if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
-               test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){},
-                                  "VM type is KVM_X86_SW_PROTECTED_VM");
-}
-
-void test_flags(uint32_t vm_type)
-{
-       int i;
-
-       for (i = 0; i < 32; i++)
-               test_init2_invalid(vm_type,
-                       &(struct kvm_sev_init){ .flags = BIT(i) },
-                       "invalid flag");
-}
-
-void test_features(uint32_t vm_type, uint64_t supported_features)
-{
-       int i;
-
-       for (i = 0; i < 64; i++) {
-               if (!(supported_features & BIT_ULL(i)))
-                       test_init2_invalid(vm_type,
-                               &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
-                               "unknown feature");
-               else if (KNOWN_FEATURES & BIT_ULL(i))
-                       test_init2(vm_type,
-                               &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       int kvm_fd = open_kvm_dev_path_or_exit();
-       bool have_sev;
-
-       TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV,
-                                          KVM_X86_SEV_VMSA_FEATURES) == 0);
-       kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV,
-                           KVM_X86_SEV_VMSA_FEATURES,
-                           &supported_vmsa_features);
-
-       have_sev = kvm_cpu_has(X86_FEATURE_SEV);
-       TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)),
-                   "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
-                   kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
-
-       TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
-       have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
-
-       TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
-                   "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
-                   kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
-
-       test_vm_types();
-
-       test_flags(KVM_X86_SEV_VM);
-       if (have_sev_es)
-               test_flags(KVM_X86_SEV_ES_VM);
-
-       test_features(KVM_X86_SEV_VM, 0);
-       if (have_sev_es)
-               test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
deleted file mode 100644 (file)
index 0a6dfba..0000000
+++ /dev/null
@@ -1,397 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/kvm.h>
-#include <linux/psp-sev.h>
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <pthread.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "sev.h"
-#include "kselftest.h"
-
-#define NR_MIGRATE_TEST_VCPUS 4
-#define NR_MIGRATE_TEST_VMS 3
-#define NR_LOCK_TESTING_THREADS 3
-#define NR_LOCK_TESTING_ITERATIONS 10000
-
-bool have_sev_es;
-
-static struct kvm_vm *sev_vm_create(bool es)
-{
-       struct kvm_vm *vm;
-       int i;
-
-       vm = vm_create_barebones();
-       if (!es)
-               sev_vm_init(vm);
-       else
-               sev_es_vm_init(vm);
-
-       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
-               __vm_vcpu_add(vm, i);
-
-       sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
-
-       if (es)
-               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
-       return vm;
-}
-
-static struct kvm_vm *aux_vm_create(bool with_vcpus)
-{
-       struct kvm_vm *vm;
-       int i;
-
-       vm = vm_create_barebones();
-       if (!with_vcpus)
-               return vm;
-
-       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
-               __vm_vcpu_add(vm, i);
-
-       return vm;
-}
-
-static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
-{
-       return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
-}
-
-
-static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
-{
-       int ret;
-
-       ret = __sev_migrate_from(dst, src);
-       TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
-}
-
-static void test_sev_migrate_from(bool es)
-{
-       struct kvm_vm *src_vm;
-       struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
-       int i, ret;
-
-       src_vm = sev_vm_create(es);
-       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
-               dst_vms[i] = aux_vm_create(true);
-
-       /* Initial migration from the src to the first dst. */
-       sev_migrate_from(dst_vms[0], src_vm);
-
-       for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
-               sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
-
-       /* Migrate the guest back to the original VM. */
-       ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
-       TEST_ASSERT(ret == -1 && errno == EIO,
-                   "VM that was migrated from should be dead. ret %d, errno: %d", ret,
-                   errno);
-
-       kvm_vm_free(src_vm);
-       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
-               kvm_vm_free(dst_vms[i]);
-}
-
-struct locking_thread_input {
-       struct kvm_vm *vm;
-       struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
-};
-
-static void *locking_test_thread(void *arg)
-{
-       int i, j;
-       struct locking_thread_input *input = (struct locking_thread_input *)arg;
-
-       for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
-               j = i % NR_LOCK_TESTING_THREADS;
-               __sev_migrate_from(input->vm, input->source_vms[j]);
-       }
-
-       return NULL;
-}
-
-static void test_sev_migrate_locking(void)
-{
-       struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
-       pthread_t pt[NR_LOCK_TESTING_THREADS];
-       int i;
-
-       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
-               input[i].vm = sev_vm_create(/* es= */ false);
-               input[0].source_vms[i] = input[i].vm;
-       }
-       for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
-               memcpy(input[i].source_vms, input[0].source_vms,
-                      sizeof(input[i].source_vms));
-
-       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
-               pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
-
-       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
-               pthread_join(pt[i], NULL);
-       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
-               kvm_vm_free(input[i].vm);
-}
-
-static void test_sev_migrate_parameters(void)
-{
-       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
-               *sev_es_vm_no_vmsa;
-       int ret;
-
-       vm_no_vcpu = vm_create_barebones();
-       vm_no_sev = aux_vm_create(true);
-       ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
-       TEST_ASSERT(ret == -1 && errno == EINVAL,
-                   "Migrations require SEV enabled. ret %d, errno: %d", ret,
-                   errno);
-
-       if (!have_sev_es)
-               goto out;
-
-       sev_vm = sev_vm_create(/* es= */ false);
-       sev_es_vm = sev_vm_create(/* es= */ true);
-       sev_es_vm_no_vmsa = vm_create_barebones();
-       sev_es_vm_init(sev_es_vm_no_vmsa);
-       __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
-
-       ret = __sev_migrate_from(sev_vm, sev_es_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_migrate_from(sev_es_vm, sev_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
-               ret, errno);
-
-       kvm_vm_free(sev_vm);
-       kvm_vm_free(sev_es_vm);
-       kvm_vm_free(sev_es_vm_no_vmsa);
-out:
-       kvm_vm_free(vm_no_vcpu);
-       kvm_vm_free(vm_no_sev);
-}
-
-static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
-{
-       return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
-}
-
-
-static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
-{
-       int ret;
-
-       ret = __sev_mirror_create(dst, src);
-       TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
-}
-
-static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
-{
-       struct kvm_sev_guest_status status;
-       int cmd_id;
-
-       for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
-               int ret;
-
-               /*
-                * These commands are allowed for mirror VMs, all others are
-                * not.
-                */
-               switch (cmd_id) {
-               case KVM_SEV_LAUNCH_UPDATE_VMSA:
-               case KVM_SEV_GUEST_STATUS:
-               case KVM_SEV_DBG_DECRYPT:
-               case KVM_SEV_DBG_ENCRYPT:
-                       continue;
-               default:
-                       break;
-               }
-
-               /*
-                * These commands should be disallowed before the data
-                * parameter is examined so NULL is OK here.
-                */
-               ret = __vm_sev_ioctl(vm, cmd_id, NULL);
-               TEST_ASSERT(
-                       ret == -1 && errno == EINVAL,
-                       "Should not be able call command: %d. ret: %d, errno: %d",
-                       cmd_id, ret, errno);
-       }
-
-       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
-}
-
-static void test_sev_mirror(bool es)
-{
-       struct kvm_vm *src_vm, *dst_vm;
-       int i;
-
-       src_vm = sev_vm_create(es);
-       dst_vm = aux_vm_create(false);
-
-       sev_mirror_create(dst_vm, src_vm);
-
-       /* Check that we can complete creation of the mirror VM.  */
-       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
-               __vm_vcpu_add(dst_vm, i);
-
-       if (es)
-               vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
-
-       verify_mirror_allowed_cmds(dst_vm);
-
-       kvm_vm_free(src_vm);
-       kvm_vm_free(dst_vm);
-}
-
-static void test_sev_mirror_parameters(void)
-{
-       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
-       int ret;
-
-       sev_vm = sev_vm_create(/* es= */ false);
-       vm_with_vcpu = aux_vm_create(true);
-       vm_no_vcpu = aux_vm_create(false);
-
-       ret = __sev_mirror_create(sev_vm, sev_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able copy context to self. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
-       TEST_ASSERT(ret == -1 && errno == EINVAL,
-                   "Copy context requires SEV enabled. ret %d, errno: %d", ret,
-                   errno);
-
-       ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
-               ret, errno);
-
-       if (!have_sev_es)
-               goto out;
-
-       sev_es_vm = sev_vm_create(/* es= */ true);
-       ret = __sev_mirror_create(sev_vm, sev_es_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
-               ret, errno);
-
-       ret = __sev_mirror_create(sev_es_vm, sev_vm);
-       TEST_ASSERT(
-               ret == -1 && errno == EINVAL,
-               "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
-               ret, errno);
-
-       kvm_vm_free(sev_es_vm);
-
-out:
-       kvm_vm_free(sev_vm);
-       kvm_vm_free(vm_with_vcpu);
-       kvm_vm_free(vm_no_vcpu);
-}
-
-static void test_sev_move_copy(void)
-{
-       struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
-                     *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
-
-       sev_vm = sev_vm_create(/* es= */ false);
-       dst_vm = aux_vm_create(true);
-       dst2_vm = aux_vm_create(true);
-       dst3_vm = aux_vm_create(true);
-       mirror_vm = aux_vm_create(false);
-       dst_mirror_vm = aux_vm_create(false);
-       dst2_mirror_vm = aux_vm_create(false);
-       dst3_mirror_vm = aux_vm_create(false);
-
-       sev_mirror_create(mirror_vm, sev_vm);
-
-       sev_migrate_from(dst_mirror_vm, mirror_vm);
-       sev_migrate_from(dst_vm, sev_vm);
-
-       sev_migrate_from(dst2_vm, dst_vm);
-       sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
-
-       sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
-       sev_migrate_from(dst3_vm, dst2_vm);
-
-       kvm_vm_free(dst_vm);
-       kvm_vm_free(sev_vm);
-       kvm_vm_free(dst2_vm);
-       kvm_vm_free(dst3_vm);
-       kvm_vm_free(mirror_vm);
-       kvm_vm_free(dst_mirror_vm);
-       kvm_vm_free(dst2_mirror_vm);
-       kvm_vm_free(dst3_mirror_vm);
-
-       /*
-        * Run similar test be destroy mirrors before mirrored VMs to ensure
-        * destruction is done safely.
-        */
-       sev_vm = sev_vm_create(/* es= */ false);
-       dst_vm = aux_vm_create(true);
-       mirror_vm = aux_vm_create(false);
-       dst_mirror_vm = aux_vm_create(false);
-
-       sev_mirror_create(mirror_vm, sev_vm);
-
-       sev_migrate_from(dst_mirror_vm, mirror_vm);
-       sev_migrate_from(dst_vm, sev_vm);
-
-       kvm_vm_free(mirror_vm);
-       kvm_vm_free(dst_mirror_vm);
-       kvm_vm_free(dst_vm);
-       kvm_vm_free(sev_vm);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
-       have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
-
-       if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
-               test_sev_migrate_from(/* es= */ false);
-               if (have_sev_es)
-                       test_sev_migrate_from(/* es= */ true);
-               test_sev_migrate_locking();
-               test_sev_migrate_parameters();
-               if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
-                       test_sev_move_copy();
-       }
-       if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
-               test_sev_mirror(/* es= */ false);
-               if (have_sev_es)
-                       test_sev_mirror(/* es= */ true);
-               test_sev_mirror_parameters();
-       }
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
deleted file mode 100644 (file)
index ae77698..0000000
+++ /dev/null
@@ -1,205 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <math.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "linux/psp-sev.h"
-#include "sev.h"
-
-
-#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
-
-static void guest_sev_es_code(void)
-{
-       /* TODO: Check CPUID after GHCB-based hypercall support is added. */
-       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
-       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
-
-       /*
-        * TODO: Add GHCB and ucall support for SEV-ES guests.  For now, simply
-        * force "termination" to signal "done" via the GHCB MSR protocol.
-        */
-       wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
-       __asm__ __volatile__("rep; vmmcall");
-}
-
-static void guest_sev_code(void)
-{
-       GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
-       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
-
-       GUEST_DONE();
-}
-
-/* Stash state passed via VMSA before any compiled code runs.  */
-extern void guest_code_xsave(void);
-asm("guest_code_xsave:\n"
-    "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n"
-    "xor %edx, %edx\n"
-    "xsave (%rdi)\n"
-    "jmp guest_sev_es_code");
-
-static void compare_xsave(u8 *from_host, u8 *from_guest)
-{
-       int i;
-       bool bad = false;
-       for (i = 0; i < 4095; i++) {
-               if (from_host[i] != from_guest[i]) {
-                       printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
-                       bad = true;
-               }
-       }
-
-       if (bad)
-               abort();
-}
-
-static void test_sync_vmsa(uint32_t policy)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       vm_vaddr_t gva;
-       void *hva;
-
-       double x87val = M_PI;
-       struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
-
-       vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
-       gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
-                                   MEM_REGION_TEST_DATA);
-       hva = addr_gva2hva(vm, gva);
-
-       vcpu_args_set(vcpu, 1, gva);
-
-       asm("fninit\n"
-           "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n"
-           "fldl %3\n"
-           "xsave (%2)\n"
-           "fstp %%st\n"
-           : "=m"(xsave)
-           : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val)
-           : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
-       vcpu_xsave_set(vcpu, &xsave);
-
-       vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
-
-       /* This page is shared, so make it decrypted.  */
-       memset(hva, 0, 4096);
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
-                   "Wanted SYSTEM_EVENT, got %s",
-                   exit_reason_str(vcpu->run->exit_reason));
-       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
-       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
-       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
-
-       compare_xsave((u8 *)&xsave, (u8 *)hva);
-
-       kvm_vm_free(vm);
-}
-
-static void test_sev(void *guest_code, uint64_t policy)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
-
-       vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
-
-       /* TODO: Validate the measurement is as expected. */
-       vm_sev_launch(vm, policy, NULL);
-
-       for (;;) {
-               vcpu_run(vcpu);
-
-               if (policy & SEV_POLICY_ES) {
-                       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
-                                   "Wanted SYSTEM_EVENT, got %s",
-                                   exit_reason_str(vcpu->run->exit_reason));
-                       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
-                       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
-                       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
-                       break;
-               }
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_SYNC:
-                       continue;
-               case UCALL_DONE:
-                       return;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               default:
-                       TEST_FAIL("Unexpected exit: %s",
-                                 exit_reason_str(vcpu->run->exit_reason));
-               }
-       }
-
-       kvm_vm_free(vm);
-}
-
-static void guest_shutdown_code(void)
-{
-       struct desc_ptr idt;
-
-       /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
-       memset(&idt, 0, sizeof(idt));
-       __asm__ __volatile__("lidt %0" :: "m"(idt));
-
-       __asm__ __volatile__("ud2");
-}
-
-static void test_sev_es_shutdown(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       uint32_t type = KVM_X86_SEV_ES_VM;
-
-       vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
-
-       vm_sev_launch(vm, SEV_POLICY_ES, NULL);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
-                   "Wanted SHUTDOWN, got %s",
-                   exit_reason_str(vcpu->run->exit_reason));
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       const u64 xf_mask = XFEATURE_MASK_X87_AVX;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
-       test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
-       test_sev(guest_sev_code, 0);
-
-       if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
-               test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
-               test_sev(guest_sev_es_code, SEV_POLICY_ES);
-
-               test_sev_es_shutdown();
-
-               if (kvm_has_cap(KVM_CAP_XCRS) &&
-                   (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
-                       test_sync_vmsa(0);
-                       test_sync_vmsa(SEV_POLICY_NO_DBG);
-               }
-       }
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
deleted file mode 100644 (file)
index fabeead..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Test that KVM emulates instructions in response to EPT violations when
- * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
- */
-#include "flds_emulation.h"
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define MAXPHYADDR 36
-
-#define MEM_REGION_GVA 0x0000123456789000
-#define MEM_REGION_GPA 0x0000000700000000
-#define MEM_REGION_SLOT        10
-#define MEM_REGION_SIZE PAGE_SIZE
-
-static void guest_code(bool tdp_enabled)
-{
-       uint64_t error_code;
-       uint64_t vector;
-
-       vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
-
-       /*
-        * When TDP is enabled, flds will trigger an emulation failure, exit to
-        * userspace, and then the selftest host "VMM" skips the instruction.
-        *
-        * When TDP is disabled, no instruction emulation is required so flds
-        * should generate #PF(RSVD).
-        */
-       if (tdp_enabled) {
-               GUEST_ASSERT(!vector);
-       } else {
-               GUEST_ASSERT_EQ(vector, PF_VECTOR);
-               GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
-       }
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       uint64_t *pte;
-       uint64_t *hva;
-       uint64_t gpa;
-       int rc;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
-
-       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
-
-       rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
-       TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
-       vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   MEM_REGION_GPA, MEM_REGION_SLOT,
-                                   MEM_REGION_SIZE / PAGE_SIZE, 0);
-       gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
-                                MEM_REGION_GPA, MEM_REGION_SLOT);
-       TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
-       virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
-       hva = addr_gpa2hva(vm, MEM_REGION_GPA);
-       memset(hva, 0, PAGE_SIZE);
-
-       pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
-       *pte |= BIT_ULL(MAXPHYADDR);
-
-       vcpu_run(vcpu);
-
-       /*
-        * When TDP is enabled, KVM must emulate in response the guest physical
-        * address that is illegal from the guest's perspective, but is legal
-        * from hardware's perspeective.  This should result in an emulation
-        * failure exit to userspace since KVM doesn't support emulating flds.
-        */
-       if (kvm_is_tdp_enabled()) {
-               handle_flds_emulation_failure_exit(vcpu);
-               vcpu_run(vcpu);
-       }
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
-       }
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
deleted file mode 100644 (file)
index 55c88d6..0000000
+++ /dev/null
@@ -1,209 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for SMM.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-
-#include "vmx.h"
-#include "svm_util.h"
-
-#define SMRAM_SIZE 65536
-#define SMRAM_MEMSLOT ((1 << 16) | 1)
-#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
-#define SMRAM_GPA 0x1000000
-#define SMRAM_STAGE 0xfe
-
-#define STR(x) #x
-#define XSTR(s) STR(s)
-
-#define SYNC_PORT 0xe
-#define DONE 0xff
-
-/*
- * This is compiled as normal 64-bit code, however, SMI handler is executed
- * in real-address mode. To stay simple we're limiting ourselves to a mode
- * independent subset of asm here.
- * SMI handler always report back fixed stage SMRAM_STAGE.
- */
-uint8_t smi_handler[] = {
-       0xb0, SMRAM_STAGE,    /* mov $SMRAM_STAGE, %al */
-       0xe4, SYNC_PORT,      /* in $SYNC_PORT, %al */
-       0x0f, 0xaa,           /* rsm */
-};
-
-static inline void sync_with_host(uint64_t phase)
-{
-       asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
-                    : "+a" (phase));
-}
-
-static void self_smi(void)
-{
-       x2apic_write_reg(APIC_ICR,
-                        APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
-}
-
-static void l2_guest_code(void)
-{
-       sync_with_host(8);
-
-       sync_with_host(10);
-
-       vmcall();
-}
-
-static void guest_code(void *arg)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
-       struct svm_test_data *svm = arg;
-       struct vmx_pages *vmx_pages = arg;
-
-       sync_with_host(1);
-
-       wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
-
-       sync_with_host(2);
-
-       self_smi();
-
-       sync_with_host(4);
-
-       if (arg) {
-               if (this_cpu_has(X86_FEATURE_SVM)) {
-                       generic_svm_setup(svm, l2_guest_code,
-                                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-               } else {
-                       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-                       GUEST_ASSERT(load_vmcs(vmx_pages));
-                       prepare_vmcs(vmx_pages, l2_guest_code,
-                                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-               }
-
-               sync_with_host(5);
-
-               self_smi();
-
-               sync_with_host(7);
-
-               if (this_cpu_has(X86_FEATURE_SVM)) {
-                       run_guest(svm->vmcb, svm->vmcb_gpa);
-                       run_guest(svm->vmcb, svm->vmcb_gpa);
-               } else {
-                       vmlaunch();
-                       vmresume();
-               }
-
-               /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
-               sync_with_host(12);
-       }
-
-       sync_with_host(DONE);
-}
-
-void inject_smi(struct kvm_vcpu *vcpu)
-{
-       struct kvm_vcpu_events events;
-
-       vcpu_events_get(vcpu, &events);
-
-       events.smi.pending = 1;
-       events.flags |= KVM_VCPUEVENT_VALID_SMM;
-
-       vcpu_events_set(vcpu, &events);
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t nested_gva = 0;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_regs regs;
-       struct kvm_vm *vm;
-       struct kvm_x86_state *state;
-       int stage, stage_reported;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
-                                   SMRAM_MEMSLOT, SMRAM_PAGES, 0);
-       TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
-                   == SMRAM_GPA, "could not allocate guest physical addresses?");
-
-       memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
-       memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
-              sizeof(smi_handler));
-
-       vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
-
-       if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
-               if (kvm_cpu_has(X86_FEATURE_SVM))
-                       vcpu_alloc_svm(vm, &nested_gva);
-               else if (kvm_cpu_has(X86_FEATURE_VMX))
-                       vcpu_alloc_vmx(vm, &nested_gva);
-       }
-
-       if (!nested_gva)
-               pr_info("will skip SMM test with VMX enabled\n");
-
-       vcpu_args_set(vcpu, 1, nested_gva);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               memset(&regs, 0, sizeof(regs));
-               vcpu_regs_get(vcpu, &regs);
-
-               stage_reported = regs.rax & 0xff;
-
-               if (stage_reported == DONE)
-                       goto done;
-
-               TEST_ASSERT(stage_reported == stage ||
-                           stage_reported == SMRAM_STAGE,
-                           "Unexpected stage: #%x, got %x",
-                           stage, stage_reported);
-
-               /*
-                * Enter SMM during L2 execution and check that we correctly
-                * return from it. Do not perform save/restore while in SMM yet.
-                */
-               if (stage == 8) {
-                       inject_smi(vcpu);
-                       continue;
-               }
-
-               /*
-                * Perform save/restore while the guest is in SMM triggered
-                * during L2 execution.
-                */
-               if (stage == 10)
-                       inject_smi(vcpu);
-
-               state = vcpu_save_state(vcpu);
-               kvm_vm_release(vm);
-
-               vcpu = vm_recreate_with_one_vcpu(vm);
-               vcpu_load_state(vcpu, state);
-               kvm_x86_state_cleanup(state);
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
deleted file mode 100644 (file)
index 141b7fc..0000000
+++ /dev/null
@@ -1,323 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * KVM_GET/SET_* tests
- *
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for vCPU state save/restore, including nested guest state.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-#include "svm_util.h"
-
-#define L2_GUEST_STACK_SIZE 256
-
-void svm_l2_guest_code(void)
-{
-       GUEST_SYNC(4);
-       /* Exit to L1 */
-       vmcall();
-       GUEST_SYNC(6);
-       /* Done, exit to L1 and never come back.  */
-       vmcall();
-}
-
-static void svm_l1_guest_code(struct svm_test_data *svm)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       GUEST_ASSERT(svm->vmcb_gpa);
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm, svm_l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_SYNC(3);
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_SYNC(5);
-       vmcb->save.rip += 3;
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_SYNC(7);
-}
-
-void vmx_l2_guest_code(void)
-{
-       GUEST_SYNC(6);
-
-       /* Exit to L1 */
-       vmcall();
-
-       /* L1 has now set up a shadow VMCS for us.  */
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
-       GUEST_SYNC(10);
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
-       GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
-       GUEST_SYNC(11);
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
-       GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
-       GUEST_SYNC(12);
-
-       /* Done, exit to L1 and never come back.  */
-       vmcall();
-}
-
-static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       GUEST_ASSERT(vmx_pages->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_SYNC(3);
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-
-       GUEST_SYNC(4);
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-
-       prepare_vmcs(vmx_pages, vmx_l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_SYNC(5);
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       /* Check that the launched state is preserved.  */
-       GUEST_ASSERT(vmlaunch());
-
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       GUEST_SYNC(7);
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
-
-       vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
-       vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
-
-       GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
-       GUEST_ASSERT(vmlaunch());
-       GUEST_SYNC(8);
-       GUEST_ASSERT(vmlaunch());
-       GUEST_ASSERT(vmresume());
-
-       vmwrite(GUEST_RIP, 0xc0ffee);
-       GUEST_SYNC(9);
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
-
-       GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
-       GUEST_ASSERT(vmlaunch());
-       GUEST_ASSERT(vmresume());
-       GUEST_SYNC(13);
-       GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
-       GUEST_ASSERT(vmlaunch());
-       GUEST_ASSERT(vmresume());
-}
-
-static void __attribute__((__flatten__)) guest_code(void *arg)
-{
-       GUEST_SYNC(1);
-
-       if (this_cpu_has(X86_FEATURE_XSAVE)) {
-               uint64_t supported_xcr0 = this_cpu_supported_xcr0();
-               uint8_t buffer[4096];
-
-               memset(buffer, 0xcc, sizeof(buffer));
-
-               /*
-                * Modify state for all supported xfeatures to take them out of
-                * their "init" state, i.e. to make them show up in XSTATE_BV.
-                *
-                * Note off-by-default features, e.g. AMX, are out of scope for
-                * this particular testcase as they have a different ABI.
-                */
-               GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
-               asm volatile ("fincstp");
-
-               GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
-               asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
-
-               if (supported_xcr0 & XFEATURE_MASK_YMM)
-                       asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
-
-               if (supported_xcr0 & XFEATURE_MASK_AVX512) {
-                       asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
-                       asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
-                       asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
-               }
-
-               if (this_cpu_has(X86_FEATURE_MPX)) {
-                       uint64_t bounds[2] = { 10, 0xffffffffull };
-                       uint64_t output[2] = { };
-
-                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
-                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
-
-                       /*
-                        * Don't bother trying to get BNDCSR into the INUSE
-                        * state.  MSR_IA32_BNDCFGS doesn't count as it isn't
-                        * managed via XSAVE/XRSTOR, and BNDCFGU can only be
-                        * modified by XRSTOR.  Stuffing XSTATE_BV in the host
-                        * is simpler than doing XRSTOR here in the guest.
-                        *
-                        * However, temporarily enable MPX in BNDCFGS so that
-                        * BNDMOV actually loads BND1.  If MPX isn't *fully*
-                        * enabled, all MPX instructions are treated as NOPs.
-                        *
-                        * Hand encode "bndmov (%rax),%bnd1" as support for MPX
-                        * mnemonics/registers has been removed from gcc and
-                        * clang (and was never fully supported by clang).
-                        */
-                       wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
-                       asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
-                       /*
-                        * Hand encode "bndmov %bnd1, (%rax)" to sanity check
-                        * that BND1 actually got loaded.
-                        */
-                       asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
-                       wrmsr(MSR_IA32_BNDCFGS, 0);
-
-                       GUEST_ASSERT_EQ(bounds[0], output[0]);
-                       GUEST_ASSERT_EQ(bounds[1], output[1]);
-               }
-               if (this_cpu_has(X86_FEATURE_PKU)) {
-                       GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
-                       set_cr4(get_cr4() | X86_CR4_PKE);
-                       GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
-
-                       wrpkru(-1u);
-               }
-       }
-
-       GUEST_SYNC(2);
-
-       if (arg) {
-               if (this_cpu_has(X86_FEATURE_SVM))
-                       svm_l1_guest_code(arg);
-               else
-                       vmx_l1_guest_code(arg);
-       }
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       uint64_t *xstate_bv, saved_xstate_bv;
-       vm_vaddr_t nested_gva = 0;
-       struct kvm_cpuid2 empty_cpuid = {};
-       struct kvm_regs regs1, regs2;
-       struct kvm_vcpu *vcpu, *vcpuN;
-       struct kvm_vm *vm;
-       struct kvm_x86_state *state;
-       struct ucall uc;
-       int stage;
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vcpu_regs_get(vcpu, &regs1);
-
-       if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
-               if (kvm_cpu_has(X86_FEATURE_SVM))
-                       vcpu_alloc_svm(vm, &nested_gva);
-               else if (kvm_cpu_has(X86_FEATURE_VMX))
-                       vcpu_alloc_vmx(vm, &nested_gva);
-       }
-
-       if (!nested_gva)
-               pr_info("will skip nested state checks\n");
-
-       vcpu_args_set(vcpu, 1, nested_gva);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               /* UCALL_SYNC is handled here.  */
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-
-               state = vcpu_save_state(vcpu);
-               memset(&regs1, 0, sizeof(regs1));
-               vcpu_regs_get(vcpu, &regs1);
-
-               kvm_vm_release(vm);
-
-               /* Restore state in a new VM.  */
-               vcpu = vm_recreate_with_one_vcpu(vm);
-               vcpu_load_state(vcpu, state);
-
-               /*
-                * Restore XSAVE state in a dummy vCPU, first without doing
-                * KVM_SET_CPUID2, and then with an empty guest CPUID.  Except
-                * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
-                * allow KVM_SET_XSAVE regardless of guest CPUID.  Manually
-                * load only XSAVE state, MSRs in particular have a much more
-                * convoluted ABI.
-                *
-                * Load two versions of XSAVE state: one with the actual guest
-                * XSAVE state, and one with all supported features forced "on"
-                * in xstate_bv, e.g. to ensure that KVM allows loading all
-                * supported features, even if something goes awry in saving
-                * the original snapshot.
-                */
-               xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
-               saved_xstate_bv = *xstate_bv;
-
-               vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
-               vcpu_xsave_set(vcpuN, state->xsave);
-               *xstate_bv = kvm_cpu_supported_xcr0();
-               vcpu_xsave_set(vcpuN, state->xsave);
-
-               vcpu_init_cpuid(vcpuN, &empty_cpuid);
-               vcpu_xsave_set(vcpuN, state->xsave);
-               *xstate_bv = saved_xstate_bv;
-               vcpu_xsave_set(vcpuN, state->xsave);
-
-               kvm_x86_state_cleanup(state);
-
-               memset(&regs2, 0, sizeof(regs2));
-               vcpu_regs_get(vcpu, &regs2);
-               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-                           (ulong) regs2.rdi, (ulong) regs2.rsi);
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
deleted file mode 100644 (file)
index 916e042..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_int_ctl_test
- *
- * Copyright (C) 2021, Red Hat, Inc.
- *
- * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "apic.h"
-
-bool vintr_irq_called;
-bool intr_irq_called;
-
-#define VINTR_IRQ_NUMBER 0x20
-#define INTR_IRQ_NUMBER 0x30
-
-static void vintr_irq_handler(struct ex_regs *regs)
-{
-       vintr_irq_called = true;
-}
-
-static void intr_irq_handler(struct ex_regs *regs)
-{
-       x2apic_write_reg(APIC_EOI, 0x00);
-       intr_irq_called = true;
-}
-
-static void l2_guest_code(struct svm_test_data *svm)
-{
-       /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
-        * and since L1 didn't enable virtual interrupt masking,
-        * L2 should receive it and not L1.
-        *
-        * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
-        * so it should also receive it after the following 'sti'.
-        */
-       x2apic_write_reg(APIC_ICR,
-               APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
-
-       __asm__ __volatile__(
-               "sti\n"
-               "nop\n"
-       );
-
-       GUEST_ASSERT(vintr_irq_called);
-       GUEST_ASSERT(intr_irq_called);
-
-       __asm__ __volatile__(
-               "vmcall\n"
-       );
-}
-
-static void l1_guest_code(struct svm_test_data *svm)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       x2apic_enable();
-
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm, l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /* No virtual interrupt masking */
-       vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-
-       /* No intercepts for real and virtual interrupts */
-       vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
-
-       /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
-       vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
-       vmcb->control.int_vector = VINTR_IRQ_NUMBER;
-
-       run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t svm_gva;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
-       vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
-
-       vcpu_alloc_svm(vm, &svm_gva);
-       vcpu_args_set(vcpu, 1, svm_gva);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-               /* NOT REACHED */
-       case UCALL_DONE:
-               goto done;
-       default:
-               TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-       }
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
deleted file mode 100644 (file)
index 00135cb..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_nested_shutdown_test
- *
- * Copyright (C) 2022, Red Hat, Inc.
- *
- * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-
-static void l2_guest_code(struct svm_test_data *svm)
-{
-       __asm__ __volatile__("ud2");
-}
-
-static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       generic_svm_setup(svm, l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
-
-       idt[6].p   = 0; // #UD is intercepted but its injection will cause #NP
-       idt[11].p  = 0; // #NP is not intercepted and will cause another
-                       // #NP that will be converted to #DF
-       idt[8].p   = 0; // #DF will cause #NP which will cause SHUTDOWN
-
-       run_guest(vmcb, svm->vmcb_gpa);
-
-       /* should not reach here */
-       GUEST_ASSERT(0);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t svm_gva;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-       vcpu_alloc_svm(vm, &svm_gva);
-
-       vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
deleted file mode 100644 (file)
index 7b6481d..0000000
+++ /dev/null
@@ -1,210 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2022 Oracle and/or its affiliates.
- *
- * Based on:
- *   svm_int_ctl_test
- *
- *   Copyright (C) 2021, Red Hat, Inc.
- *
- */
-#include <stdatomic.h>
-#include <stdio.h>
-#include <unistd.h>
-#include "apic.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-#include "test_util.h"
-
-#define INT_NR                 0x20
-
-static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
-
-static unsigned int bp_fired;
-static void guest_bp_handler(struct ex_regs *regs)
-{
-       bp_fired++;
-}
-
-static unsigned int int_fired;
-static void l2_guest_code_int(void);
-
-static void guest_int_handler(struct ex_regs *regs)
-{
-       int_fired++;
-       GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
-}
-
-static void l2_guest_code_int(void)
-{
-       GUEST_ASSERT_EQ(int_fired, 1);
-
-       /*
-         * Same as the vmmcall() function, but with a ud2 sneaked after the
-         * vmmcall.  The caller injects an exception with the return address
-         * increased by 2, so the "pop rbp" must be after the ud2 and we cannot
-        * use vmmcall() directly.
-         */
-       __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
-                             : : "a"(0xdeadbeef), "c"(0xbeefdead)
-                             : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
-                               "r10", "r11", "r12", "r13", "r14", "r15");
-
-       GUEST_ASSERT_EQ(bp_fired, 1);
-       hlt();
-}
-
-static atomic_int nmi_stage;
-#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
-#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
-static void guest_nmi_handler(struct ex_regs *regs)
-{
-       nmi_stage_inc();
-
-       if (nmi_stage_get() == 1) {
-               vmmcall();
-               GUEST_FAIL("Unexpected resume after VMMCALL");
-       } else {
-               GUEST_ASSERT_EQ(nmi_stage_get(), 3);
-               GUEST_DONE();
-       }
-}
-
-static void l2_guest_code_nmi(void)
-{
-       ud2();
-}
-
-static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       if (is_nmi)
-               x2apic_enable();
-
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm,
-                         is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
-       vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
-
-       if (is_nmi) {
-               vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
-       } else {
-               vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
-               /* The return address pushed on stack */
-               vmcb->control.next_rip = vmcb->save.rip;
-       }
-
-       run_guest(vmcb, svm->vmcb_gpa);
-       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
-                      "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
-                      vmcb->control.exit_code,
-                      vmcb->control.exit_info_1, vmcb->control.exit_info_2);
-
-       if (is_nmi) {
-               clgi();
-               x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
-
-               GUEST_ASSERT_EQ(nmi_stage_get(), 1);
-               nmi_stage_inc();
-
-               stgi();
-               /* self-NMI happens here */
-               while (true)
-                       cpu_relax();
-       }
-
-       /* Skip over VMMCALL */
-       vmcb->save.rip += 3;
-
-       /* Switch to alternate IDT to cause intervening NPF again */
-       vmcb->save.idtr.base = idt_alt;
-       vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
-
-       vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
-       /* The return address pushed on stack, skip over UD2 */
-       vmcb->control.next_rip = vmcb->save.rip + 2;
-
-       run_guest(vmcb, svm->vmcb_gpa);
-       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
-                      "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
-                      vmcb->control.exit_code,
-                      vmcb->control.exit_info_1, vmcb->control.exit_info_2);
-
-       GUEST_DONE();
-}
-
-static void run_test(bool is_nmi)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       vm_vaddr_t svm_gva;
-       vm_vaddr_t idt_alt_vm;
-       struct kvm_guest_debug debug;
-
-       pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
-       vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
-       vm_install_exception_handler(vm, INT_NR, guest_int_handler);
-
-       vcpu_alloc_svm(vm, &svm_gva);
-
-       if (!is_nmi) {
-               void *idt, *idt_alt;
-
-               idt_alt_vm = vm_vaddr_alloc_page(vm);
-               idt_alt = addr_gva2hva(vm, idt_alt_vm);
-               idt = addr_gva2hva(vm, vm->arch.idt);
-               memcpy(idt_alt, idt, getpagesize());
-       } else {
-               idt_alt_vm = 0;
-       }
-       vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
-
-       memset(&debug, 0, sizeof(debug));
-       vcpu_guest_debug_set(vcpu, &debug);
-
-       struct ucall uc;
-
-       alarm(2);
-       vcpu_run(vcpu);
-       alarm(0);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-               /* NOT REACHED */
-       case UCALL_DONE:
-               goto done;
-       default:
-               TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-       }
-done:
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
-       TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
-                   "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
-
-       atomic_init(&nmi_stage, 0);
-
-       run_test(false);
-       run_test(true);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
deleted file mode 100644 (file)
index 8a62cca..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * svm_vmcall_test
- *
- * Copyright (C) 2020, Red Hat, Inc.
- *
- * Nested SVM testing: VMCALL
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "svm_util.h"
-
-static void l2_guest_code(struct svm_test_data *svm)
-{
-       __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct svm_test_data *svm)
-{
-       #define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       struct vmcb *vmcb = svm->vmcb;
-
-       /* Prepare for L2 execution. */
-       generic_svm_setup(svm, l2_guest_code,
-                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       run_guest(vmcb, svm->vmcb_gpa);
-
-       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       vm_vaddr_t svm_gva;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       vcpu_alloc_svm(vm, &svm_gva);
-       vcpu_args_set(vcpu, 1, svm_gva);
-
-       for (;;) {
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-               }
-       }
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
deleted file mode 100644 (file)
index 8fa3948..0000000
+++ /dev/null
@@ -1,411 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Test for x86 KVM_CAP_SYNC_REGS
- *
- * Copyright (C) 2018, Google LLC.
- *
- * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
- * including requesting an invalid register set, updates to/from values
- * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <pthread.h>
-
-#include "kvm_test_harness.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
-
-struct ucall uc_none = {
-       .cmd = UCALL_NONE,
-};
-
-/*
- * ucall is embedded here to protect against compiler reshuffling registers
- * before calling a function. In this test we only need to get KVM_EXIT_IO
- * vmexit and preserve RBX, no additional information is needed.
- */
-void guest_code(void)
-{
-       asm volatile("1: in %[port], %%al\n"
-                    "add $0x1, %%rbx\n"
-                    "jmp 1b"
-                    : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
-                    : "rax", "rbx");
-}
-
-KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
-
-static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
-{
-#define REG_COMPARE(reg) \
-       TEST_ASSERT(left->reg == right->reg, \
-                   "Register " #reg \
-                   " values did not match: 0x%llx, 0x%llx", \
-                   left->reg, right->reg)
-       REG_COMPARE(rax);
-       REG_COMPARE(rbx);
-       REG_COMPARE(rcx);
-       REG_COMPARE(rdx);
-       REG_COMPARE(rsi);
-       REG_COMPARE(rdi);
-       REG_COMPARE(rsp);
-       REG_COMPARE(rbp);
-       REG_COMPARE(r8);
-       REG_COMPARE(r9);
-       REG_COMPARE(r10);
-       REG_COMPARE(r11);
-       REG_COMPARE(r12);
-       REG_COMPARE(r13);
-       REG_COMPARE(r14);
-       REG_COMPARE(r15);
-       REG_COMPARE(rip);
-       REG_COMPARE(rflags);
-#undef REG_COMPARE
-}
-
-static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
-{
-}
-
-static void compare_vcpu_events(struct kvm_vcpu_events *left,
-                               struct kvm_vcpu_events *right)
-{
-}
-
-#define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
-#define INVALID_SYNC_FIELD 0x80000000
-
-/*
- * Set an exception as pending *and* injected while KVM is processing events.
- * KVM is supposed to ignore/drop pending exceptions if userspace is also
- * requesting that an exception be injected.
- */
-static void *race_events_inj_pen(void *arg)
-{
-       struct kvm_run *run = (struct kvm_run *)arg;
-       struct kvm_vcpu_events *events = &run->s.regs.events;
-
-       WRITE_ONCE(events->exception.nr, UD_VECTOR);
-
-       for (;;) {
-               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
-               WRITE_ONCE(events->flags, 0);
-               WRITE_ONCE(events->exception.injected, 1);
-               WRITE_ONCE(events->exception.pending, 1);
-
-               pthread_testcancel();
-       }
-
-       return NULL;
-}
-
-/*
- * Set an invalid exception vector while KVM is processing events.  KVM is
- * supposed to reject any vector >= 32, as well as NMIs (vector 2).
- */
-static void *race_events_exc(void *arg)
-{
-       struct kvm_run *run = (struct kvm_run *)arg;
-       struct kvm_vcpu_events *events = &run->s.regs.events;
-
-       for (;;) {
-               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
-               WRITE_ONCE(events->flags, 0);
-               WRITE_ONCE(events->exception.nr, UD_VECTOR);
-               WRITE_ONCE(events->exception.pending, 1);
-               WRITE_ONCE(events->exception.nr, 255);
-
-               pthread_testcancel();
-       }
-
-       return NULL;
-}
-
-/*
- * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
- * illegal, and KVM's MMU heavily relies on vCPU state being valid.
- */
-static noinline void *race_sregs_cr4(void *arg)
-{
-       struct kvm_run *run = (struct kvm_run *)arg;
-       __u64 *cr4 = &run->s.regs.sregs.cr4;
-       __u64 pae_enabled = *cr4;
-       __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
-
-       for (;;) {
-               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
-               WRITE_ONCE(*cr4, pae_enabled);
-               asm volatile(".rept 512\n\t"
-                            "nop\n\t"
-                            ".endr");
-               WRITE_ONCE(*cr4, pae_disabled);
-
-               pthread_testcancel();
-       }
-
-       return NULL;
-}
-
-static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
-{
-       const time_t TIMEOUT = 2; /* seconds, roughly */
-       struct kvm_x86_state *state;
-       struct kvm_translation tr;
-       struct kvm_run *run;
-       pthread_t thread;
-       time_t t;
-
-       run = vcpu->run;
-
-       run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
-       vcpu_run(vcpu);
-       run->kvm_valid_regs = 0;
-
-       /* Save state *before* spawning the thread that mucks with vCPU state. */
-       state = vcpu_save_state(vcpu);
-
-       /*
-        * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
-        * should already be set in guest state.
-        */
-       TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
-                   (run->s.regs.sregs.efer & EFER_LME),
-                   "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
-                   !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
-                   !!(run->s.regs.sregs.efer & EFER_LME));
-
-       TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
-
-       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
-               /*
-                * Reload known good state if the vCPU triple faults, e.g. due
-                * to the unhandled #GPs being injected.  VMX preserves state
-                * on shutdown, but SVM synthesizes an INIT as the VMCB state
-                * is architecturally undefined on triple fault.
-                */
-               if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
-                       vcpu_load_state(vcpu, state);
-
-               if (racer == race_sregs_cr4) {
-                       tr = (struct kvm_translation) { .linear_address = 0 };
-                       __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
-               }
-       }
-
-       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
-       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
-
-       kvm_x86_state_cleanup(state);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Request reading invalid register set from VCPU. */
-       run->kvm_valid_regs = INVALID_SYNC_FIELD;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_valid_regs = 0;
-
-       run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_valid_regs = 0;
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       int rv;
-
-       /* Request setting invalid register set into VCPU. */
-       run->kvm_dirty_regs = INVALID_SYNC_FIELD;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_dirty_regs = 0;
-
-       run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
-       TEST_ASSERT(rv < 0 && errno == EINVAL,
-                   "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
-                   rv);
-       run->kvm_dirty_regs = 0;
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_vcpu_events events;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-
-       /* Request and verify all valid register sets. */
-       /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       vcpu_regs_get(vcpu, &regs);
-       compare_regs(&regs, &run->s.regs.regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       compare_sregs(&sregs, &run->s.regs.sregs);
-
-       vcpu_events_get(vcpu, &events);
-       compare_vcpu_events(&events, &run->s.regs.events);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_vcpu_events events;
-       struct kvm_sregs sregs;
-       struct kvm_regs regs;
-
-       /* Run once to get register set */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       /* Set and verify various register values. */
-       run->s.regs.regs.rbx = 0xBAD1DEA;
-       run->s.regs.sregs.apic_base = 1 << 11;
-       /* TODO run->s.regs.events.XYZ = ABC; */
-
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
-                   "rbx sync regs value incorrect 0x%llx.",
-                   run->s.regs.regs.rbx);
-       TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
-                   "apic_base sync regs value incorrect 0x%llx.",
-                   run->s.regs.sregs.apic_base);
-
-       vcpu_regs_get(vcpu, &regs);
-       compare_regs(&regs, &run->s.regs.regs);
-
-       vcpu_sregs_get(vcpu, &sregs);
-       compare_sregs(&sregs, &run->s.regs.sregs);
-
-       vcpu_events_get(vcpu, &events);
-       compare_vcpu_events(&events, &run->s.regs.events);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-
-       /* Clear kvm_dirty_regs bits, verify new s.regs values are
-        * overwritten with existing guest values.
-        */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       run->kvm_dirty_regs = 0;
-       run->s.regs.regs.rbx = 0xDEADBEEF;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
-                   "rbx sync regs value incorrect 0x%llx.",
-                   run->s.regs.regs.rbx);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_regs regs;
-
-       /* Run once to get register set */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       /* Clear kvm_valid_regs bits and kvm_dirty_bits.
-        * Verify s.regs values are not overwritten with existing guest values
-        * and that guest values are not overwritten with kvm_sync_regs values.
-        */
-       run->kvm_valid_regs = 0;
-       run->kvm_dirty_regs = 0;
-       run->s.regs.regs.rbx = 0xAAAA;
-       vcpu_regs_get(vcpu, &regs);
-       regs.rbx = 0xBAC0;
-       vcpu_regs_set(vcpu, &regs);
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
-                   "rbx sync regs value incorrect 0x%llx.",
-                   run->s.regs.regs.rbx);
-       vcpu_regs_get(vcpu, &regs);
-       TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
-                   "rbx guest value incorrect 0x%llx.",
-                   regs.rbx);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
-{
-       struct kvm_run *run = vcpu->run;
-       struct kvm_regs regs;
-
-       /* Run once to get register set */
-       run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
-        * with existing guest values but that guest values are overwritten
-        * with kvm_sync_regs values.
-        */
-       run->kvm_valid_regs = 0;
-       run->kvm_dirty_regs = TEST_SYNC_FIELDS;
-       run->s.regs.regs.rbx = 0xBBBB;
-       vcpu_run(vcpu);
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
-                   "rbx sync regs value incorrect 0x%llx.",
-                   run->s.regs.regs.rbx);
-       vcpu_regs_get(vcpu, &regs);
-       TEST_ASSERT(regs.rbx == 0xBBBB + 1,
-                   "rbx guest value incorrect 0x%llx.",
-                   regs.rbx);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
-{
-       race_sync_regs(vcpu, race_sregs_cr4);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
-{
-       race_sync_regs(vcpu, race_events_exc);
-}
-
-KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
-{
-       race_sync_regs(vcpu, race_events_inj_pen);
-}
-
-int main(int argc, char *argv[])
-{
-       int cap;
-
-       cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
-       TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
-       TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
-
-       return test_harness_run(argc, argv);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
deleted file mode 100644 (file)
index 56306a1..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-#include "svm_util.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#define ARBITRARY_IO_PORT      0x2000
-
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
-static void l2_guest_code(void)
-{
-       asm volatile("inb %%dx, %%al"
-                    : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
-}
-
-#define L2_GUEST_STACK_SIZE 64
-unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-void l1_guest_code_vmx(struct vmx_pages *vmx)
-{
-
-       GUEST_ASSERT(vmx->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
-       GUEST_ASSERT(load_vmcs(vmx));
-
-       prepare_vmcs(vmx, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_ASSERT(!vmlaunch());
-       /* L2 should triple fault after a triple fault event injected. */
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
-       GUEST_DONE();
-}
-
-void l1_guest_code_svm(struct svm_test_data *svm)
-{
-       struct vmcb *vmcb = svm->vmcb;
-
-       generic_svm_setup(svm, l2_guest_code,
-                       &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /* don't intercept shutdown to test the case of SVM allowing to do so */
-       vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
-
-       run_guest(vmcb, svm->vmcb_gpa);
-
-       /* should not reach here, L1 should crash  */
-       GUEST_ASSERT(0);
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct kvm_vcpu_events events;
-       struct ucall uc;
-
-       bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
-       bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
-
-       TEST_REQUIRE(has_vmx || has_svm);
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
-
-
-       if (has_vmx) {
-               vm_vaddr_t vmx_pages_gva;
-
-               vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
-               vcpu_alloc_vmx(vm, &vmx_pages_gva);
-               vcpu_args_set(vcpu, 1, vmx_pages_gva);
-       } else {
-               vm_vaddr_t svm_gva;
-
-               vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
-               vcpu_alloc_svm(vm, &svm_gva);
-               vcpu_args_set(vcpu, 1, svm_gva);
-       }
-
-       vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
-       run = vcpu->run;
-       vcpu_run(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
-                   "Expected IN from port %d from L2, got port %d",
-                   ARBITRARY_IO_PORT, run->io.port);
-       vcpu_events_get(vcpu, &events);
-       events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
-       events.triple_fault.pending = true;
-       vcpu_events_set(vcpu, &events);
-       run->immediate_exit = true;
-       vcpu_run_complete_io(vcpu);
-
-       vcpu_events_get(vcpu, &events);
-       TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
-                   "Triple fault event invalid");
-       TEST_ASSERT(events.triple_fault.pending,
-                   "No triple fault pending");
-       vcpu_run(vcpu);
-
-
-       if (has_svm) {
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
-       } else {
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_DONE:
-                       break;
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               default:
-                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-               }
-       }
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
deleted file mode 100644 (file)
index 12b0964..0000000
+++ /dev/null
@@ -1,161 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <string.h>
-#include "kvm_util.h"
-#include "processor.h"
-
-#define UNITY                  (1ull << 30)
-#define HOST_ADJUST            (UNITY * 64)
-#define GUEST_STEP             (UNITY * 4)
-#define ROUND(x)               ((x + UNITY / 2) & -UNITY)
-#define rounded_rdmsr(x)       ROUND(rdmsr(x))
-#define rounded_host_rdmsr(x)  ROUND(vcpu_get_msr(vcpu, x))
-
-static void guest_code(void)
-{
-       u64 val = 0;
-
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
-       val = 1ull * GUEST_STEP;
-       wrmsr(MSR_IA32_TSC, val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
-       GUEST_SYNC(2);
-       val = 2ull * GUEST_STEP;
-       wrmsr(MSR_IA32_TSC_ADJUST, val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Host: setting the TSC offset.  */
-       GUEST_SYNC(3);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
-        * host-side offset and affect both MSRs.
-        */
-       GUEST_SYNC(4);
-       val = 3ull * GUEST_STEP;
-       wrmsr(MSR_IA32_TSC_ADJUST, val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
-        * offset is now visible in MSR_IA32_TSC_ADJUST.
-        */
-       GUEST_SYNC(5);
-       val = 4ull * GUEST_STEP;
-       wrmsr(MSR_IA32_TSC, val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
-       GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
-
-       GUEST_DONE();
-}
-
-static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               if (!strcmp((const char *)uc.args[0], "hello") &&
-                   uc.args[1] == stage + 1)
-                       ksft_test_result_pass("stage %d passed\n", stage + 1);
-               else
-                       ksft_test_result_fail(
-                               "stage %d: Unexpected register values vmexit, got %lx",
-                               stage + 1, (ulong)uc.args[1]);
-               return;
-       case UCALL_DONE:
-               ksft_test_result_pass("stage %d passed\n", stage + 1);
-               return;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_ASSERT(false, "Unexpected exit: %s",
-                           exit_reason_str(vcpu->run->exit_reason));
-       }
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       uint64_t val;
-
-       ksft_print_header();
-       ksft_set_plan(5);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       val = 0;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
-       run_vcpu(vcpu, 1);
-       val = 1ull * GUEST_STEP;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
-       run_vcpu(vcpu, 2);
-       val = 2ull * GUEST_STEP;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Host: writes to MSR_IA32_TSC set the host-side offset
-        * and therefore do not change MSR_IA32_TSC_ADJUST.
-        */
-       vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-       run_vcpu(vcpu, 3);
-
-       /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC.  */
-       vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
-
-       /* Restore previous value.  */
-       vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
-        * host-side offset and affect both MSRs.
-        */
-       run_vcpu(vcpu, 4);
-       val = 3ull * GUEST_STEP;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
-
-       /*
-        * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
-        * offset is now visible in MSR_IA32_TSC_ADJUST.
-        */
-       run_vcpu(vcpu, 5);
-       val = 4ull * GUEST_STEP;
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
-
-       kvm_vm_free(vm);
-
-       ksft_finished();        /* Print results and exit() accordingly */
-}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
deleted file mode 100644 (file)
index 59c7304..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2021 Amazon.com, Inc. or its affiliates.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <stdint.h>
-#include <time.h>
-#include <sched.h>
-#include <signal.h>
-#include <pthread.h>
-
-#define NR_TEST_VCPUS 20
-
-static struct kvm_vm *vm;
-pthread_spinlock_t create_lock;
-
-#define TEST_TSC_KHZ    2345678UL
-#define TEST_TSC_OFFSET 200000000
-
-uint64_t tsc_sync;
-static void guest_code(void)
-{
-       uint64_t start_tsc, local_tsc, tmp;
-
-       start_tsc = rdtsc();
-       do {
-               tmp = READ_ONCE(tsc_sync);
-               local_tsc = rdtsc();
-               WRITE_ONCE(tsc_sync, local_tsc);
-               if (unlikely(local_tsc < tmp))
-                       GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
-
-       } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
-
-       GUEST_DONE();
-}
-
-
-static void *run_vcpu(void *_cpu_nr)
-{
-       unsigned long vcpu_id = (unsigned long)_cpu_nr;
-       unsigned long failures = 0;
-       static bool first_cpu_done;
-       struct kvm_vcpu *vcpu;
-
-       /* The kernel is fine, but vm_vcpu_add() needs locking */
-       pthread_spin_lock(&create_lock);
-
-       vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
-
-       if (!first_cpu_done) {
-               first_cpu_done = true;
-               vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
-       }
-
-       pthread_spin_unlock(&create_lock);
-
-       for (;;) {
-                struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-                case UCALL_DONE:
-                       goto out;
-
-                case UCALL_SYNC:
-                       printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
-                              uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
-                       failures++;
-                       break;
-
-                default:
-                        TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
- out:
-       return (void *)failures;
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
-
-       vm = vm_create(NR_TEST_VCPUS);
-       vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
-
-       pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
-       pthread_t cpu_threads[NR_TEST_VCPUS];
-       unsigned long cpu;
-       for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
-               pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
-
-       unsigned long failures = 0;
-       for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
-               void *this_cpu_failures;
-               pthread_join(cpu_threads[cpu], &this_cpu_failures);
-               failures += (unsigned long)this_cpu_failures;
-       }
-
-       TEST_ASSERT(!failures, "TSC sync failed");
-       pthread_spin_destroy(&create_lock);
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
deleted file mode 100644 (file)
index 57f157c..0000000
+++ /dev/null
@@ -1,295 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ucna_injection_test
- *
- * Copyright (C) 2022, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Test that user space can inject UnCorrectable No Action required (UCNA)
- * memory errors to the guest.
- *
- * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
- * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
- * corresponding per-bank control register (MCI_CTL2) bit enabled.
- * The test also checks that the UCNA errors get recorded in the
- * Machine Check bank registers no matter the error signal interrupts get
- * delivered into the guest or not.
- *
- */
-#include <pthread.h>
-#include <inttypes.h>
-#include <string.h>
-#include <time.h>
-
-#include "kvm_util.h"
-#include "mce.h"
-#include "processor.h"
-#include "test_util.h"
-#include "apic.h"
-
-#define SYNC_FIRST_UCNA 9
-#define SYNC_SECOND_UCNA 10
-#define SYNC_GP 11
-#define FIRST_UCNA_ADDR 0xdeadbeef
-#define SECOND_UCNA_ADDR 0xcafeb0ba
-
-/*
- * Vector for the CMCI interrupt.
- * Value is arbitrary. Any value in 0x20-0xFF should work:
- * https://wiki.osdev.org/Interrupt_Vector_Table
- */
-#define CMCI_VECTOR  0xa9
-
-#define UCNA_BANK  0x7 // IMC0 bank
-
-#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
-
-static uint64_t supported_mcg_caps;
-
-/*
- * Record states about the injected UCNA.
- * The variables started with the 'i_' prefixes are recorded in interrupt
- * handler. Variables without the 'i_' prefixes are recorded in guest main
- * execution thread.
- */
-static volatile uint64_t i_ucna_rcvd;
-static volatile uint64_t i_ucna_addr;
-static volatile uint64_t ucna_addr;
-static volatile uint64_t ucna_addr2;
-
-struct thread_params {
-       struct kvm_vcpu *vcpu;
-       uint64_t *p_i_ucna_rcvd;
-       uint64_t *p_i_ucna_addr;
-       uint64_t *p_ucna_addr;
-       uint64_t *p_ucna_addr2;
-};
-
-static void verify_apic_base_addr(void)
-{
-       uint64_t msr = rdmsr(MSR_IA32_APICBASE);
-       uint64_t base = GET_APIC_BASE(msr);
-
-       GUEST_ASSERT(base == APIC_DEFAULT_GPA);
-}
-
-static void ucna_injection_guest_code(void)
-{
-       uint64_t ctl2;
-       verify_apic_base_addr();
-       xapic_enable();
-
-       /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
-       xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
-       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
-       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
-
-       /* Enables interrupt in guest. */
-       asm volatile("sti");
-
-       /* Let user space inject the first UCNA */
-       GUEST_SYNC(SYNC_FIRST_UCNA);
-
-       ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
-
-       /* Disables the per-bank CMCI signaling. */
-       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
-       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
-
-       /* Let the user space inject the second UCNA */
-       GUEST_SYNC(SYNC_SECOND_UCNA);
-
-       ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
-       GUEST_DONE();
-}
-
-static void cmci_disabled_guest_code(void)
-{
-       uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
-       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
-
-       GUEST_DONE();
-}
-
-static void cmci_enabled_guest_code(void)
-{
-       uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
-       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
-
-       GUEST_DONE();
-}
-
-static void guest_cmci_handler(struct ex_regs *regs)
-{
-       i_ucna_rcvd++;
-       i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
-       xapic_write_reg(APIC_EOI, 0);
-}
-
-static void guest_gp_handler(struct ex_regs *regs)
-{
-       GUEST_SYNC(SYNC_GP);
-}
-
-static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
-                   "Expect UCALL_SYNC");
-       TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
-       printf("vCPU received GP in guest.\n");
-}
-
-static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
-       /*
-        * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
-        * the IA32_MCi_STATUS register.
-        * MSCOD=1 (BIT[16] - MscodDataRdErr).
-        * MCACOD=0x0090 (Memory controller error format, channel 0)
-        */
-       uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
-                         MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
-       struct kvm_x86_mce mce = {};
-       mce.status = status;
-       mce.mcg_status = 0;
-       /*
-        * MCM_ADDR_PHYS indicates the reported address is a physical address.
-        * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
-        * is at 4KB granularity.
-        */
-       mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
-       mce.addr = addr;
-       mce.bank = UCNA_BANK;
-
-       vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
-}
-
-static void *run_ucna_injection(void *arg)
-{
-       struct thread_params *params = (struct thread_params *)arg;
-       struct ucall uc;
-       int old;
-       int r;
-
-       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
-       TEST_ASSERT(r == 0,
-                   "pthread_setcanceltype failed with errno=%d",
-                   r);
-
-       vcpu_run(params->vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
-                   "Expect UCALL_SYNC");
-       TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
-
-       printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
-
-       inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
-       vcpu_run(params->vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
-       TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
-                   "Expect UCALL_SYNC");
-       TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
-
-       printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
-
-       inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
-       vcpu_run(params->vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
-       if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
-               TEST_ASSERT(false, "vCPU assertion failure: %s.",
-                           (const char *)uc.args[0]);
-       }
-
-       return NULL;
-}
-
-static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
-{
-       struct kvm_vm *vm = vcpu->vm;
-       params->vcpu = vcpu;
-       params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
-       params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
-       params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
-       params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
-
-       run_ucna_injection(params);
-
-       TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
-       TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
-                   "Only first UCNA reported addr get recorded via interrupt.");
-       TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
-                   "First injected UCNAs should get exposed via registers.");
-       TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
-                   "Second injected UCNAs should get exposed via registers.");
-
-       printf("Test successful.\n"
-              "UCNA CMCI interrupts received: %ld\n"
-              "Last UCNA address received via CMCI: %lx\n"
-              "First UCNA address in vCPU thread: %lx\n"
-              "Second UCNA address in vCPU thread: %lx\n",
-              *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
-              *params->p_ucna_addr, *params->p_ucna_addr2);
-}
-
-static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
-{
-       uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
-       if (enable_cmci_p)
-               mcg_caps |= MCG_CMCI_P;
-
-       mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
-       vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
-}
-
-static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
-                                                bool enable_cmci_p, void *guest_code)
-{
-       struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
-       setup_mce_cap(vcpu, enable_cmci_p);
-       return vcpu;
-}
-
-int main(int argc, char *argv[])
-{
-       struct thread_params params;
-       struct kvm_vm *vm;
-       struct kvm_vcpu *ucna_vcpu;
-       struct kvm_vcpu *cmcidis_vcpu;
-       struct kvm_vcpu *cmci_vcpu;
-
-       kvm_check_cap(KVM_CAP_MCE);
-
-       vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
-
-       kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
-                 &supported_mcg_caps);
-
-       if (!(supported_mcg_caps & MCG_CMCI_P)) {
-               print_skip("MCG_CMCI_P is not supported");
-               exit(KSFT_SKIP);
-       }
-
-       ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
-       cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
-       cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
-
-       vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
-       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
-       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
-       test_ucna_injection(ucna_vcpu, &params);
-       run_vcpu_expect_gp(cmcidis_vcpu);
-       run_vcpu_expect_gp(cmci_vcpu);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
deleted file mode 100644 (file)
index 9481cbc..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-static void guest_ins_port80(uint8_t *buffer, unsigned int count)
-{
-       unsigned long end;
-
-       if (count == 2)
-               end = (unsigned long)buffer + 1;
-       else
-               end = (unsigned long)buffer + 8192;
-
-       asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
-       GUEST_ASSERT_EQ(count, 0);
-       GUEST_ASSERT_EQ((unsigned long)buffer, end);
-}
-
-static void guest_code(void)
-{
-       uint8_t buffer[8192];
-       int i;
-
-       /*
-        * Special case tests.  main() will adjust RCX 2 => 1 and 3 => 8192 to
-        * test that KVM doesn't explode when userspace modifies the "count" on
-        * a userspace I/O exit.  KVM isn't required to play nice with the I/O
-        * itself as KVM doesn't support manipulating the count, it just needs
-        * to not explode or overflow a buffer.
-        */
-       guest_ins_port80(buffer, 2);
-       guest_ins_port80(buffer, 3);
-
-       /* Verify KVM fills the buffer correctly when not stuffing RCX. */
-       memset(buffer, 0, sizeof(buffer));
-       guest_ins_port80(buffer, 8192);
-       for (i = 0; i < 8192; i++)
-               __GUEST_ASSERT(buffer[i] == 0xaa,
-                              "Expected '0xaa', got '0x%x' at buffer[%u]",
-                              buffer[i], i);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_regs regs;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-
-       memset(&regs, 0, sizeof(regs));
-
-       while (1) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               if (get_ucall(vcpu, &uc))
-                       break;
-
-               TEST_ASSERT(run->io.port == 0x80,
-                           "Expected I/O at port 0x80, got port 0x%x", run->io.port);
-
-               /*
-                * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
-                * Note, this abuses KVM's batching of rep string I/O to avoid
-                * getting stuck in an infinite loop.  That behavior isn't in
-                * scope from a testing perspective as it's not ABI in any way,
-                * i.e. it really is abusing internal KVM knowledge.
-                */
-               vcpu_regs_get(vcpu, &regs);
-               if (regs.rcx == 2)
-                       regs.rcx = 1;
-               if (regs.rcx == 3)
-                       regs.rcx = 8192;
-               memset((void *)run + run->io.data_offset, 0xaa, 4096);
-               vcpu_regs_set(vcpu, &regs);
-       }
-
-       switch (uc.cmd) {
-       case UCALL_DONE:
-               break;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-       }
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
deleted file mode 100644 (file)
index 32b2794..0000000
+++ /dev/null
@@ -1,769 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020, Google LLC.
- *
- * Tests for exiting into userspace on registered MSRs
- */
-#include <sys/ioctl.h>
-
-#include "kvm_test_harness.h"
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define MSR_NON_EXISTENT 0x474f4f00
-
-static u64 deny_bits = 0;
-struct kvm_msr_filter filter_allow = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ |
-                                KVM_MSR_FILTER_WRITE,
-                       .nmsrs = 1,
-                       /* Test an MSR the kernel knows about. */
-                       .base = MSR_IA32_XSS,
-                       .bitmap = (uint8_t*)&deny_bits,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ |
-                                KVM_MSR_FILTER_WRITE,
-                       .nmsrs = 1,
-                       /* Test an MSR the kernel doesn't know about. */
-                       .base = MSR_IA32_FLUSH_CMD,
-                       .bitmap = (uint8_t*)&deny_bits,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ |
-                                KVM_MSR_FILTER_WRITE,
-                       .nmsrs = 1,
-                       /* Test a fabricated MSR that no one knows about. */
-                       .base = MSR_NON_EXISTENT,
-                       .bitmap = (uint8_t*)&deny_bits,
-               },
-       },
-};
-
-struct kvm_msr_filter filter_fs = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .nmsrs = 1,
-                       .base = MSR_FS_BASE,
-                       .bitmap = (uint8_t*)&deny_bits,
-               },
-       },
-};
-
-struct kvm_msr_filter filter_gs = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .nmsrs = 1,
-                       .base = MSR_GS_BASE,
-                       .bitmap = (uint8_t*)&deny_bits,
-               },
-       },
-};
-
-static uint64_t msr_non_existent_data;
-static int guest_exception_count;
-static u32 msr_reads, msr_writes;
-
-static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_deadbeef[1] = { 0x1 };
-
-static void deny_msr(uint8_t *bitmap, u32 msr)
-{
-       u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
-
-       bitmap[idx / 8] &= ~(1 << (idx % 8));
-}
-
-static void prepare_bitmaps(void)
-{
-       memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
-       memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
-       memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
-       memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
-       memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
-
-       deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
-       deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
-       deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
-}
-
-struct kvm_msr_filter filter_deny = {
-       .flags = KVM_MSR_FILTER_DEFAULT_DENY,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .base = 0x00000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_00000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE,
-                       .base = 0x00000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_00000000_write,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
-                       .base = 0x40000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_40000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .base = 0xc0000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_c0000000_read,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE,
-                       .base = 0xc0000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_c0000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
-                       .base = 0xdeadbeef,
-                       .nmsrs = 1,
-                       .bitmap = bitmap_deadbeef,
-               },
-       },
-};
-
-struct kvm_msr_filter no_filter_deny = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-};
-
-/*
- * Note: Force test_rdmsr() to not be inlined to prevent the labels,
- * rdmsr_start and rdmsr_end, from being defined multiple times.
- */
-static noinline uint64_t test_rdmsr(uint32_t msr)
-{
-       uint32_t a, d;
-
-       guest_exception_count = 0;
-
-       __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
-                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
-       return a | ((uint64_t) d << 32);
-}
-
-/*
- * Note: Force test_wrmsr() to not be inlined to prevent the labels,
- * wrmsr_start and wrmsr_end, from being defined multiple times.
- */
-static noinline void test_wrmsr(uint32_t msr, uint64_t value)
-{
-       uint32_t a = value;
-       uint32_t d = value >> 32;
-
-       guest_exception_count = 0;
-
-       __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
-                       "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-extern char rdmsr_start, rdmsr_end;
-extern char wrmsr_start, wrmsr_end;
-
-/*
- * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
- * rdmsr_start and rdmsr_end, from being defined multiple times.
- */
-static noinline uint64_t test_em_rdmsr(uint32_t msr)
-{
-       uint32_t a, d;
-
-       guest_exception_count = 0;
-
-       __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
-                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
-
-       return a | ((uint64_t) d << 32);
-}
-
-/*
- * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
- * wrmsr_start and wrmsr_end, from being defined multiple times.
- */
-static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
-{
-       uint32_t a = value;
-       uint32_t d = value >> 32;
-
-       guest_exception_count = 0;
-
-       __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
-                       "a"(a), "d"(d), "c"(msr) : "memory");
-}
-
-extern char em_rdmsr_start, em_rdmsr_end;
-extern char em_wrmsr_start, em_wrmsr_end;
-
-static void guest_code_filter_allow(void)
-{
-       uint64_t data;
-
-       /*
-        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
-        *
-        * A GP is thrown if anything other than 0 is written to
-        * MSR_IA32_XSS.
-        */
-       data = test_rdmsr(MSR_IA32_XSS);
-       GUEST_ASSERT(data == 0);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       test_wrmsr(MSR_IA32_XSS, 0);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       test_wrmsr(MSR_IA32_XSS, 1);
-       GUEST_ASSERT(guest_exception_count == 1);
-
-       /*
-        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
-        *
-        * A GP is thrown if MSR_IA32_FLUSH_CMD is read
-        * from or if a value other than 1 is written to it.
-        */
-       test_rdmsr(MSR_IA32_FLUSH_CMD);
-       GUEST_ASSERT(guest_exception_count == 1);
-
-       test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
-       GUEST_ASSERT(guest_exception_count == 1);
-
-       test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       /*
-        * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
-        *
-        * Test that a fabricated MSR can pass through the kernel
-        * and be handled in userspace.
-        */
-       test_wrmsr(MSR_NON_EXISTENT, 2);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       data = test_rdmsr(MSR_NON_EXISTENT);
-       GUEST_ASSERT(data == 2);
-       GUEST_ASSERT(guest_exception_count == 0);
-
-       if (is_forced_emulation_enabled) {
-               /* Let userspace know we aren't done. */
-               GUEST_SYNC(0);
-
-               /*
-                * Now run the same tests with the instruction emulator.
-                */
-               data = test_em_rdmsr(MSR_IA32_XSS);
-               GUEST_ASSERT(data == 0);
-               GUEST_ASSERT(guest_exception_count == 0);
-               test_em_wrmsr(MSR_IA32_XSS, 0);
-               GUEST_ASSERT(guest_exception_count == 0);
-               test_em_wrmsr(MSR_IA32_XSS, 1);
-               GUEST_ASSERT(guest_exception_count == 1);
-
-               test_em_rdmsr(MSR_IA32_FLUSH_CMD);
-               GUEST_ASSERT(guest_exception_count == 1);
-               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
-               GUEST_ASSERT(guest_exception_count == 1);
-               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
-               GUEST_ASSERT(guest_exception_count == 0);
-
-               test_em_wrmsr(MSR_NON_EXISTENT, 2);
-               GUEST_ASSERT(guest_exception_count == 0);
-               data = test_em_rdmsr(MSR_NON_EXISTENT);
-               GUEST_ASSERT(data == 2);
-               GUEST_ASSERT(guest_exception_count == 0);
-       }
-
-       GUEST_DONE();
-}
-
-static void guest_msr_calls(bool trapped)
-{
-       /* This goes into the in-kernel emulation */
-       wrmsr(MSR_SYSCALL_MASK, 0);
-
-       if (trapped) {
-               /* This goes into user space emulation */
-               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
-               GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
-       } else {
-               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
-               GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
-       }
-
-       /* If trapped == true, this goes into user space emulation */
-       wrmsr(MSR_IA32_POWER_CTL, 0x1234);
-
-       /* This goes into the in-kernel emulation */
-       rdmsr(MSR_IA32_POWER_CTL);
-
-       /* Invalid MSR, should always be handled by user space exit */
-       GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
-       wrmsr(0xdeadbeef, 0x1234);
-}
-
-static void guest_code_filter_deny(void)
-{
-       guest_msr_calls(true);
-
-       /*
-        * Disable msr filtering, so that the kernel
-        * handles everything in the next round
-        */
-       GUEST_SYNC(0);
-
-       guest_msr_calls(false);
-
-       GUEST_DONE();
-}
-
-static void guest_code_permission_bitmap(void)
-{
-       uint64_t data;
-
-       data = test_rdmsr(MSR_FS_BASE);
-       GUEST_ASSERT(data == MSR_FS_BASE);
-       data = test_rdmsr(MSR_GS_BASE);
-       GUEST_ASSERT(data != MSR_GS_BASE);
-
-       /* Let userspace know to switch the filter */
-       GUEST_SYNC(0);
-
-       data = test_rdmsr(MSR_FS_BASE);
-       GUEST_ASSERT(data != MSR_FS_BASE);
-       data = test_rdmsr(MSR_GS_BASE);
-       GUEST_ASSERT(data == MSR_GS_BASE);
-
-       GUEST_DONE();
-}
-
-static void __guest_gp_handler(struct ex_regs *regs,
-                              char *r_start, char *r_end,
-                              char *w_start, char *w_end)
-{
-       if (regs->rip == (uintptr_t)r_start) {
-               regs->rip = (uintptr_t)r_end;
-               regs->rax = 0;
-               regs->rdx = 0;
-       } else if (regs->rip == (uintptr_t)w_start) {
-               regs->rip = (uintptr_t)w_end;
-       } else {
-               GUEST_ASSERT(!"RIP is at an unknown location!");
-       }
-
-       ++guest_exception_count;
-}
-
-static void guest_gp_handler(struct ex_regs *regs)
-{
-       __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
-                          &wrmsr_start, &wrmsr_end);
-}
-
-static void guest_fep_gp_handler(struct ex_regs *regs)
-{
-       __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
-                          &em_wrmsr_start, &em_wrmsr_end);
-}
-
-static void check_for_guest_assert(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       if (vcpu->run->exit_reason == KVM_EXIT_IO &&
-           get_ucall(vcpu, &uc) == UCALL_ABORT) {
-               REPORT_GUEST_ASSERT(uc);
-       }
-}
-
-static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
-{
-       struct kvm_run *run = vcpu->run;
-
-       check_for_guest_assert(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
-       TEST_ASSERT(run->msr.index == msr_index,
-                       "Unexpected msr (0x%04x), expected 0x%04x",
-                       run->msr.index, msr_index);
-
-       switch (run->msr.index) {
-       case MSR_IA32_XSS:
-               run->msr.data = 0;
-               break;
-       case MSR_IA32_FLUSH_CMD:
-               run->msr.error = 1;
-               break;
-       case MSR_NON_EXISTENT:
-               run->msr.data = msr_non_existent_data;
-               break;
-       case MSR_FS_BASE:
-               run->msr.data = MSR_FS_BASE;
-               break;
-       case MSR_GS_BASE:
-               run->msr.data = MSR_GS_BASE;
-               break;
-       default:
-               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
-       }
-}
-
-static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
-{
-       struct kvm_run *run = vcpu->run;
-
-       check_for_guest_assert(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
-       TEST_ASSERT(run->msr.index == msr_index,
-                       "Unexpected msr (0x%04x), expected 0x%04x",
-                       run->msr.index, msr_index);
-
-       switch (run->msr.index) {
-       case MSR_IA32_XSS:
-               if (run->msr.data != 0)
-                       run->msr.error = 1;
-               break;
-       case MSR_IA32_FLUSH_CMD:
-               if (run->msr.data != 1)
-                       run->msr.error = 1;
-               break;
-       case MSR_NON_EXISTENT:
-               msr_non_existent_data = run->msr.data;
-               break;
-       default:
-               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
-       }
-}
-
-static void process_ucall_done(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       check_for_guest_assert(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
-                   "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
-                   uc.cmd, UCALL_DONE);
-}
-
-static uint64_t process_ucall(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc = {};
-
-       check_for_guest_assert(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-               break;
-       case UCALL_ABORT:
-               check_for_guest_assert(vcpu);
-               break;
-       case UCALL_DONE:
-               process_ucall_done(vcpu);
-               break;
-       default:
-               TEST_ASSERT(false, "Unexpected ucall");
-       }
-
-       return uc.cmd;
-}
-
-static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
-                                        uint32_t msr_index)
-{
-       vcpu_run(vcpu);
-       process_rdmsr(vcpu, msr_index);
-}
-
-static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
-                                        uint32_t msr_index)
-{
-       vcpu_run(vcpu);
-       process_wrmsr(vcpu, msr_index);
-}
-
-static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
-{
-       vcpu_run(vcpu);
-       return process_ucall(vcpu);
-}
-
-static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
-{
-       vcpu_run(vcpu);
-       process_ucall_done(vcpu);
-}
-
-KVM_ONE_VCPU_TEST_SUITE(user_msr);
-
-KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
-{
-       struct kvm_vm *vm = vcpu->vm;
-       uint64_t cmd;
-       int rc;
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
-
-       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
-       /* Process guest code userspace exits. */
-       run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
-       run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-       run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-
-       run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
-       run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-       run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-
-       run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
-       run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
-
-       vcpu_run(vcpu);
-       cmd = process_ucall(vcpu);
-
-       if (is_forced_emulation_enabled) {
-               TEST_ASSERT_EQ(cmd, UCALL_SYNC);
-               vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
-
-               /* Process emulated rdmsr and wrmsr instructions. */
-               run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
-               run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-               run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
-
-               run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
-               run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-               run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
-
-               run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
-               run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
-
-               /* Confirm the guest completed without issues. */
-               run_guest_then_process_ucall_done(vcpu);
-       } else {
-               TEST_ASSERT_EQ(cmd, UCALL_DONE);
-               printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
-       }
-}
-
-static int handle_ucall(struct kvm_vcpu *vcpu)
-{
-       struct ucall uc;
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_SYNC:
-               vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
-               break;
-       case UCALL_DONE:
-               return 1;
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-       }
-
-       return 0;
-}
-
-static void handle_rdmsr(struct kvm_run *run)
-{
-       run->msr.data = run->msr.index;
-       msr_reads++;
-
-       if (run->msr.index == MSR_SYSCALL_MASK ||
-           run->msr.index == MSR_GS_BASE) {
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-                           "MSR read trap w/o access fault");
-       }
-
-       if (run->msr.index == 0xdeadbeef) {
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-                           "MSR deadbeef read trap w/o inval fault");
-       }
-}
-
-static void handle_wrmsr(struct kvm_run *run)
-{
-       /* ignore */
-       msr_writes++;
-
-       if (run->msr.index == MSR_IA32_POWER_CTL) {
-               TEST_ASSERT(run->msr.data == 0x1234,
-                           "MSR data for MSR_IA32_POWER_CTL incorrect");
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-                           "MSR_IA32_POWER_CTL trap w/o access fault");
-       }
-
-       if (run->msr.index == 0xdeadbeef) {
-               TEST_ASSERT(run->msr.data == 0x1234,
-                           "MSR data for deadbeef incorrect");
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-                           "deadbeef trap w/o inval fault");
-       }
-}
-
-KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
-{
-       struct kvm_vm *vm = vcpu->vm;
-       struct kvm_run *run = vcpu->run;
-       int rc;
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
-                                                     KVM_MSR_EXIT_REASON_UNKNOWN |
-                                                     KVM_MSR_EXIT_REASON_FILTER);
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       prepare_bitmaps();
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
-
-       while (1) {
-               vcpu_run(vcpu);
-
-               switch (run->exit_reason) {
-               case KVM_EXIT_X86_RDMSR:
-                       handle_rdmsr(run);
-                       break;
-               case KVM_EXIT_X86_WRMSR:
-                       handle_wrmsr(run);
-                       break;
-               case KVM_EXIT_IO:
-                       if (handle_ucall(vcpu))
-                               goto done;
-                       break;
-               }
-
-       }
-
-done:
-       TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
-       TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
-}
-
-KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
-{
-       struct kvm_vm *vm = vcpu->vm;
-       int rc;
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
-       run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
-       TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
-                   "Expected ucall state to be UCALL_SYNC.");
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
-       run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
-       run_guest_then_process_ucall_done(vcpu);
-}
-
-#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask)       \
-({                                                                     \
-       int r = __vm_ioctl(vm, cmd, arg);                               \
-                                                                       \
-       if (flag & valid_mask)                                          \
-               TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r));            \
-       else                                                            \
-               TEST_ASSERT(r == -1 && errno == EINVAL,                 \
-                           "Wanted EINVAL for %s with flag = 0x%llx, got  rc: %i errno: %i (%s)", \
-                           #cmd, flag, r, errno,  strerror(errno));    \
-})
-
-static void run_user_space_msr_flag_test(struct kvm_vm *vm)
-{
-       struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
-       int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
-       int rc;
-       int i;
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-
-       for (i = 0; i < nflags; i++) {
-               cap.args[0] = BIT_ULL(i);
-               test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
-                          BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
-       }
-}
-
-static void run_msr_filter_flag_test(struct kvm_vm *vm)
-{
-       u64 deny_bits = 0;
-       struct kvm_msr_filter filter = {
-               .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-               .ranges = {
-                       {
-                               .flags = KVM_MSR_FILTER_READ,
-                               .nmsrs = 1,
-                               .base = 0,
-                               .bitmap = (uint8_t *)&deny_bits,
-                       },
-               },
-       };
-       int nflags;
-       int rc;
-       int i;
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       nflags = sizeof(filter.flags) * BITS_PER_BYTE;
-       for (i = 0; i < nflags; i++) {
-               filter.flags = BIT_ULL(i);
-               test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
-                          BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
-       }
-
-       filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
-       nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
-       for (i = 0; i < nflags; i++) {
-               filter.ranges[0].flags = BIT_ULL(i);
-               test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
-                          BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
-       }
-}
-
-/* Test that attempts to write to the unused bits in a flag fails. */
-KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
-{
-       struct kvm_vm *vm = vcpu->vm;
-
-       /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
-       run_user_space_msr_flag_test(vm);
-
-       /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
-       run_msr_filter_flag_test(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       return test_harness_run(argc, argv);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
deleted file mode 100644 (file)
index a81a247..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_apic_access_test
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * The first subtest simply checks to see that an L2 guest can be
- * launched with a valid APIC-access address that is backed by a
- * page of L1 physical memory.
- *
- * The second subtest sets the APIC-access address to a (valid) L1
- * physical address that is not backed by memory. KVM can't handle
- * this situation, so resuming L2 should result in a KVM exit for
- * internal error (emulation). This is not an architectural
- * requirement. It is just a shortcoming of KVM. The internal error
- * is unfortunate, but it's better than what used to happen!
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-static void l2_guest_code(void)
-{
-       /* Exit to L1 */
-       __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       uint32_t control;
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* Prepare the VMCS for L2 execution. */
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
-       control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
-       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
-       control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
-       control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-       vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
-       vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
-
-       /* Try to launch L2 with the memory-backed APIC-access address. */
-       GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       vmwrite(APIC_ACCESS_ADDR, high_gpa);
-
-       /* Try to resume L2 with the unbacked APIC-access address. */
-       GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
-       GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned long apic_access_addr = ~0ul;
-       vm_vaddr_t vmx_pages_gva;
-       unsigned long high_gpa;
-       struct vmx_pages *vmx;
-       bool done = false;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       high_gpa = (vm->max_gfn - 1) << vm->page_shift;
-
-       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       prepare_virtualize_apic_accesses(vmx, vm);
-       vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
-
-       while (!done) {
-               volatile struct kvm_run *run = vcpu->run;
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               if (apic_access_addr == high_gpa) {
-                       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
-                       TEST_ASSERT(run->internal.suberror ==
-                                   KVM_INTERNAL_ERROR_EMULATION,
-                                   "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
-                                   run->internal.suberror);
-                       break;
-               }
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       apic_access_addr = uc.args[1];
-                       break;
-               case UCALL_DONE:
-                       done = true;
-                       break;
-               default:
-                       TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
-               }
-       }
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
deleted file mode 100644 (file)
index dad9883..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_close_while_nested
- *
- * Copyright (C) 2019, Red Hat, Inc.
- *
- * Verify that nothing bad happens if a KVM user exits with open
- * file descriptors while executing a nested guest.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-enum {
-       PORT_L0_EXIT = 0x2000,
-};
-
-static void l2_guest_code(void)
-{
-       /* Exit to L0 */
-       asm volatile("inb %%dx, %%al"
-                    : : [port] "d" (PORT_L0_EXIT) : "rax");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* Prepare the VMCS for L2 execution. */
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(0);
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       /* Allocate VMX pages and shared descriptors (vmx_pages). */
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       for (;;) {
-               volatile struct kvm_run *run = vcpu->run;
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               if (run->io.port == PORT_L0_EXIT)
-                       break;
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
deleted file mode 100644 (file)
index fa512d0..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty page logging test
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-/* The memory slot index to track dirty pages */
-#define TEST_MEM_SLOT_INDEX            1
-#define TEST_MEM_PAGES                 3
-
-/* L1 guest test virtual memory offset */
-#define GUEST_TEST_MEM                 0xc0000000
-
-/* L2 guest test virtual memory offset */
-#define NESTED_TEST_MEM1               0xc0001000
-#define NESTED_TEST_MEM2               0xc0002000
-
-static void l2_guest_code(u64 *a, u64 *b)
-{
-       READ_ONCE(*a);
-       WRITE_ONCE(*a, 1);
-       GUEST_SYNC(true);
-       GUEST_SYNC(false);
-
-       WRITE_ONCE(*b, 1);
-       GUEST_SYNC(true);
-       WRITE_ONCE(*b, 1);
-       GUEST_SYNC(true);
-       GUEST_SYNC(false);
-
-       /* Exit to L1 and never come back.  */
-       vmcall();
-}
-
-static void l2_guest_code_ept_enabled(void)
-{
-       l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
-}
-
-static void l2_guest_code_ept_disabled(void)
-{
-       /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
-       l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
-}
-
-void l1_guest_code(struct vmx_pages *vmx)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       void *l2_rip;
-
-       GUEST_ASSERT(vmx->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx));
-       GUEST_ASSERT(load_vmcs(vmx));
-
-       if (vmx->eptp_gpa)
-               l2_rip = l2_guest_code_ept_enabled;
-       else
-               l2_rip = l2_guest_code_ept_disabled;
-
-       prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       GUEST_SYNC(false);
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_SYNC(false);
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       GUEST_DONE();
-}
-
-static void test_vmx_dirty_log(bool enable_ept)
-{
-       vm_vaddr_t vmx_pages_gva = 0;
-       struct vmx_pages *vmx;
-       unsigned long *bmap;
-       uint64_t *host_test_mem;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct ucall uc;
-       bool done = false;
-
-       pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-       vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       /* Add an extra memory slot for testing dirty logging */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   GUEST_TEST_MEM,
-                                   TEST_MEM_SLOT_INDEX,
-                                   TEST_MEM_PAGES,
-                                   KVM_MEM_LOG_DIRTY_PAGES);
-
-       /*
-        * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
-        * affects both L1 and L2.  However...
-        */
-       virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
-
-       /*
-        * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
-        * 0xc0000000.
-        *
-        * Note that prepare_eptp should be called only L1's GPA map is done,
-        * meaning after the last call to virt_map.
-        *
-        * When EPT is disabled, the L2 guest code will still access the same L1
-        * GPAs as the EPT enabled case.
-        */
-       if (enable_ept) {
-               prepare_eptp(vmx, vm, 0);
-               nested_map_memslot(vmx, vm, 0);
-               nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
-               nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
-       }
-
-       bmap = bitmap_zalloc(TEST_MEM_PAGES);
-       host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
-
-       while (!done) {
-               memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       /*
-                        * The nested guest wrote at offset 0x1000 in the memslot, but the
-                        * dirty bitmap must be filled in according to L1 GPA, not L2.
-                        */
-                       kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
-                       if (uc.args[1]) {
-                               TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
-                               TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
-                       } else {
-                               TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
-                               TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
-                       }
-
-                       TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
-                       TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
-                       TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
-                       TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
-                       break;
-               case UCALL_DONE:
-                       done = true;
-                       break;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       test_vmx_dirty_log(/*enable_ept=*/false);
-
-       if (kvm_cpu_has_ept())
-               test_vmx_dirty_log(/*enable_ept=*/true);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
deleted file mode 100644 (file)
index 3fd6ece..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <signal.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-
-#include "kselftest.h"
-
-static void guest_ud_handler(struct ex_regs *regs)
-{
-       /* Loop on the ud2 until guest state is made invalid. */
-}
-
-static void guest_code(void)
-{
-       asm volatile("ud2");
-}
-
-static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
-{
-       struct kvm_run *run = vcpu->run;
-
-       vcpu_run(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
-       TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
-                   "Expected emulation failure, got %d",
-                   run->emulation_failure.suberror);
-}
-
-static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
-{
-       /*
-        * Always run twice to verify KVM handles the case where _KVM_ queues
-        * an exception with invalid state and then exits to userspace, i.e.
-        * that KVM doesn't explode if userspace ignores the initial error.
-        */
-       __run_vcpu_with_invalid_state(vcpu);
-       __run_vcpu_with_invalid_state(vcpu);
-}
-
-static void set_timer(void)
-{
-       struct itimerval timer;
-
-       timer.it_value.tv_sec  = 0;
-       timer.it_value.tv_usec = 200;
-       timer.it_interval = timer.it_value;
-       TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
-}
-
-static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
-{
-       static struct kvm_sregs sregs;
-
-       if (!sregs.cr0)
-               vcpu_sregs_get(vcpu, &sregs);
-       sregs.tr.unusable = !!set;
-       vcpu_sregs_set(vcpu, &sregs);
-}
-
-static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
-{
-       set_or_clear_invalid_guest_state(vcpu, true);
-}
-
-static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
-{
-       set_or_clear_invalid_guest_state(vcpu, false);
-}
-
-static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
-{
-       static struct kvm_vcpu *vcpu = NULL;
-
-       if (__vcpu)
-               vcpu = __vcpu;
-       return vcpu;
-}
-
-static void sigalrm_handler(int sig)
-{
-       struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
-       struct kvm_vcpu_events events;
-
-       TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
-
-       vcpu_events_get(vcpu, &events);
-
-       /*
-        * If an exception is pending, attempt KVM_RUN with invalid guest,
-        * otherwise rearm the timer and keep doing so until the timer fires
-        * between KVM queueing an exception and re-entering the guest.
-        */
-       if (events.exception.pending) {
-               set_invalid_guest_state(vcpu);
-               run_vcpu_with_invalid_state(vcpu);
-       } else {
-               set_timer();
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(host_cpu_is_intel);
-       TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       get_set_sigalrm_vcpu(vcpu);
-
-       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
-
-       /*
-        * Stuff invalid guest state for L2 by making TR unusuable.  The next
-        * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
-        * emulating invalid guest state for L2.
-        */
-       set_invalid_guest_state(vcpu);
-       run_vcpu_with_invalid_state(vcpu);
-
-       /*
-        * Verify KVM also handles the case where userspace gains control while
-        * an exception is pending and stuffs invalid state.  Run with valid
-        * guest state and a timer firing every 200us, and attempt to enter the
-        * guest with invalid state when the handler interrupts KVM with an
-        * exception pending.
-        */
-       clear_invalid_guest_state(vcpu);
-       TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
-                   "Failed to register SIGALRM handler, errno = %d (%s)",
-                   errno, strerror(errno));
-
-       set_timer();
-       run_vcpu_with_invalid_state(vcpu);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
deleted file mode 100644 (file)
index a100ee5..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#define ARBITRARY_IO_PORT 0x2000
-
-static struct kvm_vm *vm;
-
-static void l2_guest_code(void)
-{
-       /*
-        * Generate an exit to L0 userspace, i.e. main(), via I/O to an
-        * arbitrary port.
-        */
-       asm volatile("inb %%dx, %%al"
-                    : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* Prepare the VMCS for L2 execution. */
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /*
-        * L2 must be run without unrestricted guest, verify that the selftests
-        * library hasn't enabled it.  Because KVM selftests jump directly to
-        * 64-bit mode, unrestricted guest support isn't required.
-        */
-       GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
-                    !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
-
-       GUEST_ASSERT(!vmlaunch());
-
-       /* L2 should triple fault after main() stuffs invalid guest state. */
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva;
-       struct kvm_sregs sregs;
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-
-       /* Allocate VMX pages and shared descriptors (vmx_pages). */
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       vcpu_run(vcpu);
-
-       run = vcpu->run;
-
-       /*
-        * The first exit to L0 userspace should be an I/O access from L2.
-        * Running L1 should launch L2 without triggering an exit to userspace.
-        */
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
-                   "Expected IN from port %d from L2, got port %d",
-                   ARBITRARY_IO_PORT, run->io.port);
-
-       /*
-        * Stuff invalid guest state for L2 by making TR unusuable.  The next
-        * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
-        * emulating invalid guest state for L2.
-        */
-       memset(&sregs, 0, sizeof(sregs));
-       vcpu_sregs_get(vcpu, &sregs);
-       sregs.tr.unusable = 1;
-       vcpu_sregs_set(vcpu, &sregs);
-
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_DONE:
-               break;
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-       default:
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-       }
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
deleted file mode 100644 (file)
index 90720b6..0000000
+++ /dev/null
@@ -1,131 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VMX control MSR test
- *
- * Copyright (C) 2022 Google LLC.
- *
- * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
- * that KVM will set owned bits where appropriate, and will not if
- * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
- */
-#include <linux/bitmap.h>
-#include "kvm_util.h"
-#include "vmx.h"
-
-static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
-                                 uint64_t mask)
-{
-       uint64_t val = vcpu_get_msr(vcpu, msr_index);
-       uint64_t bit;
-
-       mask &= val;
-
-       for_each_set_bit(bit, &mask, 64) {
-               vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
-               vcpu_set_msr(vcpu, msr_index, val);
-       }
-}
-
-static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
-                               uint64_t mask)
-{
-       uint64_t val = vcpu_get_msr(vcpu, msr_index);
-       uint64_t bit;
-
-       mask = ~mask | val;
-
-       for_each_clear_bit(bit, &mask, 64) {
-               vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
-               vcpu_set_msr(vcpu, msr_index, val);
-       }
-}
-
-static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
-{
-       vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
-       vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
-}
-
-static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
-{
-       vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
-       vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
-
-       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
-                           BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
-
-       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
-                           BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
-                           BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
-
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
-       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
-       vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
-       vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
-}
-
-static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
-                                           uint64_t msr_bit,
-                                           struct kvm_x86_cpu_feature feature)
-{
-       uint64_t val;
-
-       vcpu_clear_cpuid_feature(vcpu, feature);
-
-       val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
-       vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
-
-       if (!kvm_cpu_has(feature))
-               return;
-
-       vcpu_set_cpuid_feature(vcpu, feature);
-}
-
-static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
-{
-       uint64_t supported_bits = FEAT_CTL_LOCKED |
-                                 FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
-                                 FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
-                                 FEAT_CTL_SGX_LC_ENABLED |
-                                 FEAT_CTL_SGX_ENABLED |
-                                 FEAT_CTL_LMCE_ENABLED;
-       int bit, r;
-
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
-       __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
-
-       for_each_clear_bit(bit, &supported_bits, 64) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
-               TEST_ASSERT(r == 0,
-                           "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
-       }
-}
-
-int main(void)
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       /* No need to actually do KVM_RUN, thus no guest code. */
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       vmx_save_restore_msrs_test(vcpu);
-       ia32_feature_control_msr_test(vcpu);
-
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
deleted file mode 100644 (file)
index 1759fa5..0000000
+++ /dev/null
@@ -1,206 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_nested_tsc_scaling_test
- *
- * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
- *
- * This test case verifies that nested TSC scaling behaves as expected when
- * both L1 and L2 are scaled using different ratios. For this test we scale
- * L1 down and scale L2 up.
- */
-
-#include <time.h>
-
-#include "kvm_util.h"
-#include "vmx.h"
-#include "kselftest.h"
-
-/* L2 is scaled up (from L1's perspective) by this factor */
-#define L2_SCALE_FACTOR 4ULL
-
-#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
-#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
-
-#define L2_GUEST_STACK_SIZE 64
-
-enum { USLEEP, UCHECK_L1, UCHECK_L2 };
-#define GUEST_SLEEP(sec)         ucall(UCALL_SYNC, 2, USLEEP, sec)
-#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
-
-
-/*
- * This function checks whether the "actual" TSC frequency of a guest matches
- * its expected frequency. In order to account for delays in taking the TSC
- * measurements, a difference of 1% between the actual and the expected value
- * is tolerated.
- */
-static void compare_tsc_freq(uint64_t actual, uint64_t expected)
-{
-       uint64_t tolerance, thresh_low, thresh_high;
-
-       tolerance = expected / 100;
-       thresh_low = expected - tolerance;
-       thresh_high = expected + tolerance;
-
-       TEST_ASSERT(thresh_low < actual,
-               "TSC freq is expected to be between %"PRIu64" and %"PRIu64
-               " but it actually is %"PRIu64,
-               thresh_low, thresh_high, actual);
-       TEST_ASSERT(thresh_high > actual,
-               "TSC freq is expected to be between %"PRIu64" and %"PRIu64
-               " but it actually is %"PRIu64,
-               thresh_low, thresh_high, actual);
-}
-
-static void check_tsc_freq(int level)
-{
-       uint64_t tsc_start, tsc_end, tsc_freq;
-
-       /*
-        * Reading the TSC twice with about a second's difference should give
-        * us an approximation of the TSC frequency from the guest's
-        * perspective. Now, this won't be completely accurate, but it should
-        * be good enough for the purposes of this test.
-        */
-       tsc_start = rdmsr(MSR_IA32_TSC);
-       GUEST_SLEEP(1);
-       tsc_end = rdmsr(MSR_IA32_TSC);
-
-       tsc_freq = tsc_end - tsc_start;
-
-       GUEST_CHECK(level, tsc_freq);
-}
-
-static void l2_guest_code(void)
-{
-       check_tsc_freq(UCHECK_L2);
-
-       /* exit to L1 */
-       __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       uint32_t control;
-
-       /* check that L1's frequency looks alright before launching L2 */
-       check_tsc_freq(UCHECK_L1);
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* prepare the VMCS for L2 execution */
-       prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /* enable TSC offsetting and TSC scaling for L2 */
-       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
-       control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
-       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
-
-       control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
-       control |= SECONDARY_EXEC_TSC_SCALING;
-       vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
-
-       vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
-       vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
-       vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
-
-       /* launch L2 */
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       /* check that L1's frequency still looks good */
-       check_tsc_freq(UCHECK_L1);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       vm_vaddr_t vmx_pages_gva;
-
-       uint64_t tsc_start, tsc_end;
-       uint64_t tsc_khz;
-       uint64_t l1_scale_factor;
-       uint64_t l0_tsc_freq = 0;
-       uint64_t l1_tsc_freq = 0;
-       uint64_t l2_tsc_freq = 0;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
-       TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
-
-       /*
-        * We set L1's scale factor to be a random number from 2 to 10.
-        * Ideally we would do the same for L2's factor but that one is
-        * referenced by both main() and l1_guest_code() and using a global
-        * variable does not work.
-        */
-       srand(time(NULL));
-       l1_scale_factor = (rand() % 9) + 2;
-       printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
-       printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
-
-       tsc_start = rdtsc();
-       sleep(1);
-       tsc_end = rdtsc();
-
-       l0_tsc_freq = tsc_end - tsc_start;
-       printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
-
-       vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
-       TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
-
-       /* scale down L1's TSC frequency */
-       vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
-
-       for (;;) {
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-               case UCALL_SYNC:
-                       switch (uc.args[0]) {
-                       case USLEEP:
-                               sleep(uc.args[1]);
-                               break;
-                       case UCHECK_L1:
-                               l1_tsc_freq = uc.args[1];
-                               printf("L1's TSC frequency is around: %"PRIu64
-                                      "\n", l1_tsc_freq);
-
-                               compare_tsc_freq(l1_tsc_freq,
-                                                l0_tsc_freq / l1_scale_factor);
-                               break;
-                       case UCHECK_L2:
-                               l2_tsc_freq = uc.args[1];
-                               printf("L2's TSC frequency is around: %"PRIu64
-                                      "\n", l2_tsc_freq);
-
-                               compare_tsc_freq(l2_tsc_freq,
-                                                l1_tsc_freq * L2_SCALE_FACTOR);
-                               break;
-                       }
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
deleted file mode 100644 (file)
index a1f5ff4..0000000
+++ /dev/null
@@ -1,247 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test for VMX-pmu perf capability msr
- *
- * Copyright (C) 2021 Intel Corporation
- *
- * Test to check the effect of various CPUID settings on
- * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
- * we write with KVM_SET_MSR is _not_ modified by the guest
- * and check it can be retrieved with KVM_GET_MSR, also test
- * the invalid LBR formats are rejected.
- */
-#include <sys/ioctl.h>
-
-#include <linux/bitmap.h>
-
-#include "kvm_test_harness.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-static union perf_capabilities {
-       struct {
-               u64     lbr_format:6;
-               u64     pebs_trap:1;
-               u64     pebs_arch_reg:1;
-               u64     pebs_format:4;
-               u64     smm_freeze:1;
-               u64     full_width_write:1;
-               u64 pebs_baseline:1;
-               u64     perf_metrics:1;
-               u64     pebs_output_pt_available:1;
-               u64     anythread_deprecated:1;
-       };
-       u64     capabilities;
-} host_cap;
-
-/*
- * The LBR format and most PEBS features are immutable, all other features are
- * fungible (if supported by the host and KVM).
- */
-static const union perf_capabilities immutable_caps = {
-       .lbr_format = -1,
-       .pebs_trap  = 1,
-       .pebs_arch_reg = 1,
-       .pebs_format = -1,
-       .pebs_baseline = 1,
-};
-
-static const union perf_capabilities format_caps = {
-       .lbr_format = -1,
-       .pebs_format = -1,
-};
-
-static void guest_test_perf_capabilities_gp(uint64_t val)
-{
-       uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
-
-       __GUEST_ASSERT(vector == GP_VECTOR,
-                      "Expected #GP for value '0x%lx', got vector '0x%x'",
-                      val, vector);
-}
-
-static void guest_code(uint64_t current_val)
-{
-       int i;
-
-       guest_test_perf_capabilities_gp(current_val);
-       guest_test_perf_capabilities_gp(0);
-
-       for (i = 0; i < 64; i++)
-               guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
-
-       GUEST_DONE();
-}
-
-KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
-
-/*
- * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
- * written, that the guest always sees the userspace controlled value, and that
- * PERF_CAPABILITIES is immutable after KVM_RUN.
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
-{
-       struct ucall uc;
-       int r, i;
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
-       vcpu_args_set(vcpu, 1, host_cap.capabilities);
-       vcpu_run(vcpu);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_ABORT:
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       case UCALL_DONE:
-               break;
-       default:
-               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-       }
-
-       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
-                       host_cap.capabilities);
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
-       r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
-       TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
-
-       for (i = 0; i < 64; i++) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
-                                 host_cap.capabilities ^ BIT_ULL(i));
-               TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
-                           host_cap.capabilities ^ BIT_ULL(i));
-       }
-}
-
-/*
- * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
- * enabled, as well as '0' (to disable all features).
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
-{
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-}
-
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
-{
-       const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
-       int bit;
-
-       for_each_set_bit(bit, &fungible_caps, 64) {
-               vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
-               vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
-                            host_cap.capabilities & ~BIT_ULL(bit));
-       }
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-}
-
-/*
- * Verify KVM rejects attempts to set unsupported and/or immutable features in
- * PERF_CAPABILITIES.  Note, LBR format and PEBS format need to be validated
- * separately as they are multi-bit values, e.g. toggling or setting a single
- * bit can generate a false positive without dedicated safeguards.
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
-{
-       const uint64_t reserved_caps = (~host_cap.capabilities |
-                                       immutable_caps.capabilities) &
-                                      ~format_caps.capabilities;
-       union perf_capabilities val = host_cap;
-       int r, bit;
-
-       for_each_set_bit(bit, &reserved_caps, 64) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
-                                 host_cap.capabilities ^ BIT_ULL(bit));
-               TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
-                           host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
-                           BIT_ULL(bit), bit);
-       }
-
-       /*
-        * KVM only supports the host's native LBR format, as well as '0' (to
-        * disable LBR support).  Verify KVM rejects all other LBR formats.
-        */
-       for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
-               if (val.lbr_format == host_cap.lbr_format)
-                       continue;
-
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
-               TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
-                           val.lbr_format, host_cap.lbr_format);
-       }
-
-       /* Ditto for the PEBS format. */
-       for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
-               if (val.pebs_format == host_cap.pebs_format)
-                       continue;
-
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
-               TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
-                           val.pebs_format, host_cap.pebs_format);
-       }
-}
-
-/*
- * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
- * disabling the vPMU via CPUID also disables LBR support.  Set bits 2:0 of
- * LBR_TOS as those bits are writable across all uarch implementations (arch
- * LBRs will need to poke a different MSR).
- */
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
-{
-       int r;
-
-       if (!host_cap.lbr_format)
-               return;
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-       vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
-
-       vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
-
-       r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
-       TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
-}
-
-KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
-{
-       uint64_t val;
-       int i, r;
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-       val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
-       TEST_ASSERT_EQ(val, host_cap.capabilities);
-
-       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM);
-
-       val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
-       TEST_ASSERT_EQ(val, 0);
-
-       vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
-
-       for (i = 0; i < 64; i++) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i));
-               TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM",
-                           i, BIT_ULL(i));
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       TEST_REQUIRE(kvm_is_pmu_enabled());
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
-
-       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
-       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
-
-       host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
-
-       TEST_ASSERT(host_cap.full_width_write,
-                   "Full-width writes should always be supported");
-
-       return test_harness_run(argc, argv);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
deleted file mode 100644 (file)
index 00dd2ac..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VMX-preemption timer test
- *
- * Copyright (C) 2020, Google, LLC.
- *
- * Test to ensure the VM-Enter after migration doesn't
- * incorrectly restarts the timer with the full timer
- * value instead of partially decayed timer value
- *
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#define PREEMPTION_TIMER_VALUE                 100000000ull
-#define PREEMPTION_TIMER_VALUE_THRESHOLD1       80000000ull
-
-u32 vmx_pt_rate;
-bool l2_save_restore_done;
-static u64 l2_vmx_pt_start;
-volatile u64 l2_vmx_pt_finish;
-
-union vmx_basic basic;
-union vmx_ctrl_msr ctrl_pin_rev;
-union vmx_ctrl_msr ctrl_exit_rev;
-
-void l2_guest_code(void)
-{
-       u64 vmx_pt_delta;
-
-       vmcall();
-       l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
-
-       /*
-        * Wait until the 1st threshold has passed
-        */
-       do {
-               l2_vmx_pt_finish = rdtsc();
-               vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
-                               vmx_pt_rate;
-       } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
-
-       /*
-        * Force L2 through Save and Restore cycle
-        */
-       GUEST_SYNC(1);
-
-       l2_save_restore_done = 1;
-
-       /*
-        * Now wait for the preemption timer to fire and
-        * exit to L1
-        */
-       while ((l2_vmx_pt_finish = rdtsc()))
-               ;
-}
-
-void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       u64 l1_vmx_pt_start;
-       u64 l1_vmx_pt_finish;
-       u64 l1_tsc_deadline, l2_tsc_deadline;
-
-       GUEST_ASSERT(vmx_pages->vmcs_gpa);
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-       GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
-
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-       /*
-        * Check for Preemption timer support
-        */
-       basic.val = rdmsr(MSR_IA32_VMX_BASIC);
-       ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
-                       : MSR_IA32_VMX_PINBASED_CTLS);
-       ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
-                       : MSR_IA32_VMX_EXIT_CTLS);
-
-       if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
-           !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
-               return;
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-       vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
-
-       /*
-        * Turn on PIN control and resume the guest
-        */
-       GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
-                             vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
-                             PIN_BASED_VMX_PREEMPTION_TIMER));
-
-       GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
-                             PREEMPTION_TIMER_VALUE));
-
-       vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
-
-       l2_save_restore_done = 0;
-
-       l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
-
-       GUEST_ASSERT(!vmresume());
-
-       l1_vmx_pt_finish = rdtsc();
-
-       /*
-        * Ensure exit from L2 happens after L2 goes through
-        * save and restore
-        */
-       GUEST_ASSERT(l2_save_restore_done);
-
-       /*
-        * Ensure the exit from L2 is due to preemption timer expiry
-        */
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
-
-       l1_tsc_deadline = l1_vmx_pt_start +
-               (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
-
-       l2_tsc_deadline = l2_vmx_pt_start +
-               (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
-
-       /*
-        * Sync with the host and pass the l1|l2 pt_expiry_finish times and
-        * tsc deadlines so that host can verify they are as expected
-        */
-       GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
-               l2_vmx_pt_finish, l2_tsc_deadline);
-}
-
-void guest_code(struct vmx_pages *vmx_pages)
-{
-       if (vmx_pages)
-               l1_guest_code(vmx_pages);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva = 0;
-
-       struct kvm_regs regs1, regs2;
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       struct kvm_x86_state *state;
-       struct ucall uc;
-       int stage;
-
-       /*
-        * AMD currently does not implement any VMX features, so for now we
-        * just early out.
-        */
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       vcpu_regs_get(vcpu, &regs1);
-
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       for (stage = 1;; stage++) {
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-
-               /* UCALL_SYNC is handled here.  */
-               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
-                           uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
-                           stage, (ulong)uc.args[1]);
-               /*
-                * If this stage 2 then we should verify the vmx pt expiry
-                * is as expected.
-                * From L1's perspective verify Preemption timer hasn't
-                * expired too early.
-                * From L2's perspective verify Preemption timer hasn't
-                * expired too late.
-                */
-               if (stage == 2) {
-
-                       pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
-                               stage, uc.args[2], uc.args[3]);
-
-                       pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
-                               stage, uc.args[4], uc.args[5]);
-
-                       TEST_ASSERT(uc.args[2] >= uc.args[3],
-                               "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
-                               stage, uc.args[2], uc.args[3]);
-
-                       TEST_ASSERT(uc.args[4] < uc.args[5],
-                               "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
-                               stage, uc.args[4], uc.args[5]);
-               }
-
-               state = vcpu_save_state(vcpu);
-               memset(&regs1, 0, sizeof(regs1));
-               vcpu_regs_get(vcpu, &regs1);
-
-               kvm_vm_release(vm);
-
-               /* Restore state in a new VM.  */
-               vcpu = vm_recreate_with_one_vcpu(vm);
-               vcpu_load_state(vcpu, state);
-               kvm_x86_state_cleanup(state);
-
-               memset(&regs2, 0, sizeof(regs2));
-               vcpu_regs_get(vcpu, &regs2);
-               TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-                           "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-                           (ulong) regs2.rdi, (ulong) regs2.rsi);
-       }
-
-done:
-       kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
deleted file mode 100644 (file)
index 67a62a5..0000000
+++ /dev/null
@@ -1,304 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_set_nested_state_test
- *
- * Copyright (C) 2019, Google LLC.
- *
- * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <errno.h>
-#include <linux/kvm.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-
-/*
- * Mirror of VMCS12_REVISION in arch/x86/kvm/vmx/vmcs12.h. If that value
- * changes this should be updated.
- */
-#define VMCS12_REVISION 0x11e57ed0
-
-bool have_evmcs;
-
-void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
-{
-       vcpu_nested_state_set(vcpu, state);
-}
-
-void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
-                                   struct kvm_nested_state *state,
-                                   int expected_errno)
-{
-       int rv;
-
-       rv = __vcpu_nested_state_set(vcpu, state);
-       TEST_ASSERT(rv == -1 && errno == expected_errno,
-               "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
-               strerror(expected_errno), expected_errno, rv, strerror(errno),
-               errno);
-}
-
-void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
-                                    struct kvm_nested_state *state)
-{
-       test_nested_state_expect_errno(vcpu, state, EINVAL);
-}
-
-void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
-                                    struct kvm_nested_state *state)
-{
-       test_nested_state_expect_errno(vcpu, state, EFAULT);
-}
-
-void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
-                               u32 vmcs12_revision)
-{
-       /* Set revision_id in vmcs12 to vmcs12_revision. */
-       memcpy(&state->data, &vmcs12_revision, sizeof(u32));
-}
-
-void set_default_state(struct kvm_nested_state *state)
-{
-       memset(state, 0, sizeof(*state));
-       state->flags = KVM_STATE_NESTED_RUN_PENDING |
-                      KVM_STATE_NESTED_GUEST_MODE;
-       state->format = 0;
-       state->size = sizeof(*state);
-}
-
-void set_default_vmx_state(struct kvm_nested_state *state, int size)
-{
-       memset(state, 0, size);
-       if (have_evmcs)
-               state->flags = KVM_STATE_NESTED_EVMCS;
-       state->format = 0;
-       state->size = size;
-       state->hdr.vmx.vmxon_pa = 0x1000;
-       state->hdr.vmx.vmcs12_pa = 0x2000;
-       state->hdr.vmx.smm.flags = 0;
-       set_revision_id_for_vmcs12(state, VMCS12_REVISION);
-}
-
-void test_vmx_nested_state(struct kvm_vcpu *vcpu)
-{
-       /* Add a page for VMCS12. */
-       const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
-       struct kvm_nested_state *state =
-               (struct kvm_nested_state *)malloc(state_sz);
-
-       /* The format must be set to 0. 0 for VMX, 1 for SVM. */
-       set_default_vmx_state(state, state_sz);
-       state->format = 1;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * We cannot virtualize anything if the guest does not have VMX
-        * enabled.
-        */
-       set_default_vmx_state(state, state_sz);
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * We cannot virtualize anything if the guest does not have VMX
-        * enabled.  We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa
-        * is set to -1ull, but the flags must be zero.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = -1ull;
-       test_nested_state_expect_einval(vcpu, state);
-
-       state->hdr.vmx.vmcs12_pa = -1ull;
-       state->flags = KVM_STATE_NESTED_EVMCS;
-       test_nested_state_expect_einval(vcpu, state);
-
-       state->flags = 0;
-       test_nested_state(vcpu, state);
-
-       /* Enable VMX in the guest CPUID. */
-       vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
-
-       /*
-        * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
-        * setting the nested state. When the eVMCS flag is not set, the
-        * expected return value is '0'.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->flags = 0;
-       state->hdr.vmx.vmxon_pa = -1ull;
-       state->hdr.vmx.vmcs12_pa = -1ull;
-       test_nested_state(vcpu, state);
-
-       /*
-        * When eVMCS is supported, the eVMCS flag can only be set if the
-        * enlightened VMCS capability has been enabled.
-        */
-       if (have_evmcs) {
-               state->flags = KVM_STATE_NESTED_EVMCS;
-               test_nested_state_expect_einval(vcpu, state);
-               vcpu_enable_evmcs(vcpu);
-               test_nested_state(vcpu, state);
-       }
-
-       /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
-       state->hdr.vmx.smm.flags = 1;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* Invalid flags are rejected. */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.flags = ~0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = -1ull;
-       state->flags = 0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* It is invalid to have vmxon_pa set to a non-page aligned address. */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = 1;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
-        * KVM_STATE_NESTED_GUEST_MODE set together.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->flags = KVM_STATE_NESTED_GUEST_MODE  |
-                     KVM_STATE_NESTED_RUN_PENDING;
-       state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * It is invalid to have any of the SMM flags set besides:
-        *      KVM_STATE_NESTED_SMM_GUEST_MODE
-        *      KVM_STATE_NESTED_SMM_VMXON
-        */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
-                               KVM_STATE_NESTED_SMM_VMXON);
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* Outside SMM, SMM flags must be zero. */
-       set_default_vmx_state(state, state_sz);
-       state->flags = 0;
-       state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * Size must be large enough to fit kvm_nested_state and vmcs12
-        * if VMCS12 physical address is set
-        */
-       set_default_vmx_state(state, state_sz);
-       state->size = sizeof(*state);
-       state->flags = 0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       set_default_vmx_state(state, state_sz);
-       state->size = sizeof(*state);
-       state->flags = 0;
-       state->hdr.vmx.vmcs12_pa = -1;
-       test_nested_state(vcpu, state);
-
-       /*
-        * KVM_SET_NESTED_STATE succeeds with invalid VMCS
-        * contents but L2 not running.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->flags = 0;
-       test_nested_state(vcpu, state);
-
-       /* Invalid flags are rejected, even if no VMCS loaded. */
-       set_default_vmx_state(state, state_sz);
-       state->size = sizeof(*state);
-       state->flags = 0;
-       state->hdr.vmx.vmcs12_pa = -1;
-       state->hdr.vmx.flags = ~0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /* vmxon_pa cannot be the same address as vmcs_pa. */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = 0;
-       state->hdr.vmx.vmcs12_pa = 0;
-       test_nested_state_expect_einval(vcpu, state);
-
-       /*
-        * Test that if we leave nesting the state reflects that when we get
-        * it again.
-        */
-       set_default_vmx_state(state, state_sz);
-       state->hdr.vmx.vmxon_pa = -1ull;
-       state->hdr.vmx.vmcs12_pa = -1ull;
-       state->flags = 0;
-       test_nested_state(vcpu, state);
-       vcpu_nested_state_get(vcpu, state);
-       TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
-                   "Size must be between %ld and %d.  The size returned was %d.",
-                   sizeof(*state), state_sz, state->size);
-       TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
-       TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
-
-       free(state);
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-       struct kvm_nested_state state;
-       struct kvm_vcpu *vcpu;
-
-       have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
-
-       TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
-
-       /*
-        * AMD currently does not implement set_nested_state, so for now we
-        * just early out.
-        */
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       /*
-        * First run tests with VMX disabled to check error handling.
-        */
-       vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
-
-       /* Passing a NULL kvm_nested_state causes a EFAULT. */
-       test_nested_state_expect_efault(vcpu, NULL);
-
-       /* 'size' cannot be smaller than sizeof(kvm_nested_state). */
-       set_default_state(&state);
-       state.size = 0;
-       test_nested_state_expect_einval(vcpu, &state);
-
-       /*
-        * Setting the flags 0xf fails the flags check.  The only flags that
-        * can be used are:
-        *     KVM_STATE_NESTED_GUEST_MODE
-        *     KVM_STATE_NESTED_RUN_PENDING
-        *     KVM_STATE_NESTED_EVMCS
-        */
-       set_default_state(&state);
-       state.flags = 0xf;
-       test_nested_state_expect_einval(vcpu, &state);
-
-       /*
-        * If KVM_STATE_NESTED_RUN_PENDING is set then
-        * KVM_STATE_NESTED_GUEST_MODE has to be set as well.
-        */
-       set_default_state(&state);
-       state.flags = KVM_STATE_NESTED_RUN_PENDING;
-       test_nested_state_expect_einval(vcpu, &state);
-
-       test_vmx_nested_state(vcpu);
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
deleted file mode 100644 (file)
index 2ceb5c7..0000000
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * vmx_tsc_adjust_test
- *
- * Copyright (C) 2018, Google LLC.
- *
- * IA32_TSC_ADJUST test
- *
- * According to the SDM, "if an execution of WRMSR to the
- * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
- * the logical processor also adds (or subtracts) value X from the
- * IA32_TSC_ADJUST MSR.
- *
- * Note that when L1 doesn't intercept writes to IA32_TSC, a
- * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
- * value.
- *
- * This test verifies that this unusual case is handled correctly.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "kselftest.h"
-
-#ifndef MSR_IA32_TSC_ADJUST
-#define MSR_IA32_TSC_ADJUST 0x3b
-#endif
-
-#define TSC_ADJUST_VALUE (1ll << 32)
-#define TSC_OFFSET_VALUE -(1ll << 48)
-
-enum {
-       PORT_ABORT = 0x1000,
-       PORT_REPORT,
-       PORT_DONE,
-};
-
-enum {
-       VMXON_PAGE = 0,
-       VMCS_PAGE,
-       MSR_BITMAP_PAGE,
-
-       NUM_VMX_PAGES,
-};
-
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
-static void check_ia32_tsc_adjust(int64_t max)
-{
-       int64_t adjust;
-
-       adjust = rdmsr(MSR_IA32_TSC_ADJUST);
-       GUEST_SYNC(adjust);
-       GUEST_ASSERT(adjust <= max);
-}
-
-static void l2_guest_code(void)
-{
-       uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
-
-       wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
-       check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
-       /* Exit to L1 */
-       __asm__ __volatile__("vmcall");
-}
-
-static void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
-       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-       uint32_t control;
-       uintptr_t save_cr3;
-
-       GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
-       wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
-       check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
-
-       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-       GUEST_ASSERT(load_vmcs(vmx_pages));
-
-       /* Prepare the VMCS for L2 execution. */
-       prepare_vmcs(vmx_pages, l2_guest_code,
-                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-       control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
-       control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
-       vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
-       vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
-       /* Jump into L2.  First, test failure to load guest CR3.  */
-       save_cr3 = vmreadz(GUEST_CR3);
-       vmwrite(GUEST_CR3, -1ull);
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
-                    (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
-       check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
-       vmwrite(GUEST_CR3, save_cr3);
-
-       GUEST_ASSERT(!vmlaunch());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-
-       check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
-       GUEST_DONE();
-}
-
-static void report(int64_t val)
-{
-       pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
-               val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
-}
-
-int main(int argc, char *argv[])
-{
-       vm_vaddr_t vmx_pages_gva;
-       struct kvm_vcpu *vcpu;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-       vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
-
-       /* Allocate VMX pages and shared descriptors (vmx_pages). */
-       vcpu_alloc_vmx(vm, &vmx_pages_gva);
-       vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-       for (;;) {
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       report(uc.args[1]);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
deleted file mode 100644 (file)
index a76078a..0000000
+++ /dev/null
@@ -1,487 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * xapic_ipi_test
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
- * another vCPU that is halted when KVM's backing page for the APIC access
- * address has been moved by mm.
- *
- * The test starts two vCPUs: one that sends IPIs and one that continually
- * executes HLT. The sender checks that the halter has woken from the HLT and
- * has reentered HLT before sending the next IPI. While the vCPUs are running,
- * the host continually calls migrate_pages to move all of the process' pages
- * amongst the available numa nodes on the machine.
- *
- * Migration is a command line option. When used on non-numa machines will 
- * exit with error. Test is still usefull on non-numa for testing IPIs.
- */
-#include <getopt.h>
-#include <pthread.h>
-#include <inttypes.h>
-#include <string.h>
-#include <time.h>
-
-#include "kvm_util.h"
-#include "numaif.h"
-#include "processor.h"
-#include "test_util.h"
-#include "vmx.h"
-
-/* Default running time for the test */
-#define DEFAULT_RUN_SECS 3
-
-/* Default delay between migrate_pages calls (microseconds) */
-#define DEFAULT_DELAY_USECS 500000
-
-/*
- * Vector for IPI from sender vCPU to halting vCPU.
- * Value is arbitrary and was chosen for the alternating bit pattern. Any
- * value should work.
- */
-#define IPI_VECTOR      0xa5
-
-/*
- * Incremented in the IPI handler. Provides evidence to the sender that the IPI
- * arrived at the destination
- */
-static volatile uint64_t ipis_rcvd;
-
-/* Data struct shared between host main thread and vCPUs */
-struct test_data_page {
-       uint32_t halter_apic_id;
-       volatile uint64_t hlt_count;
-       volatile uint64_t wake_count;
-       uint64_t ipis_sent;
-       uint64_t migrations_attempted;
-       uint64_t migrations_completed;
-       uint32_t icr;
-       uint32_t icr2;
-       uint32_t halter_tpr;
-       uint32_t halter_ppr;
-
-       /*
-        *  Record local version register as a cross-check that APIC access
-        *  worked. Value should match what KVM reports (APIC_VERSION in
-        *  arch/x86/kvm/lapic.c). If test is failing, check that values match
-        *  to determine whether APIC access exits are working.
-        */
-       uint32_t halter_lvr;
-};
-
-struct thread_params {
-       struct test_data_page *data;
-       struct kvm_vcpu *vcpu;
-       uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
-};
-
-void verify_apic_base_addr(void)
-{
-       uint64_t msr = rdmsr(MSR_IA32_APICBASE);
-       uint64_t base = GET_APIC_BASE(msr);
-
-       GUEST_ASSERT(base == APIC_DEFAULT_GPA);
-}
-
-static void halter_guest_code(struct test_data_page *data)
-{
-       verify_apic_base_addr();
-       xapic_enable();
-
-       data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
-       data->halter_lvr = xapic_read_reg(APIC_LVR);
-
-       /*
-        * Loop forever HLTing and recording halts & wakes. Disable interrupts
-        * each time around to minimize window between signaling the pending
-        * halt to the sender vCPU and executing the halt. No need to disable on
-        * first run as this vCPU executes first and the host waits for it to
-        * signal going into first halt before starting the sender vCPU. Record
-        * TPR and PPR for diagnostic purposes in case the test fails.
-        */
-       for (;;) {
-               data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
-               data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
-               data->hlt_count++;
-               asm volatile("sti; hlt; cli");
-               data->wake_count++;
-       }
-}
-
-/*
- * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
- * enable diagnosing errant writes to the APIC access address backing page in
- * case of test failure.
- */
-static void guest_ipi_handler(struct ex_regs *regs)
-{
-       ipis_rcvd++;
-       xapic_write_reg(APIC_EOI, 77);
-}
-
-static void sender_guest_code(struct test_data_page *data)
-{
-       uint64_t last_wake_count;
-       uint64_t last_hlt_count;
-       uint64_t last_ipis_rcvd_count;
-       uint32_t icr_val;
-       uint32_t icr2_val;
-       uint64_t tsc_start;
-
-       verify_apic_base_addr();
-       xapic_enable();
-
-       /*
-        * Init interrupt command register for sending IPIs
-        *
-        * Delivery mode=fixed, per SDM:
-        *   "Delivers the interrupt specified in the vector field to the target
-        *    processor."
-        *
-        * Destination mode=physical i.e. specify target by its local APIC
-        * ID. This vCPU assumes that the halter vCPU has already started and
-        * set data->halter_apic_id.
-        */
-       icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
-       icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
-       data->icr = icr_val;
-       data->icr2 = icr2_val;
-
-       last_wake_count = data->wake_count;
-       last_hlt_count = data->hlt_count;
-       last_ipis_rcvd_count = ipis_rcvd;
-       for (;;) {
-               /*
-                * Send IPI to halter vCPU.
-                * First IPI can be sent unconditionally because halter vCPU
-                * starts earlier.
-                */
-               xapic_write_reg(APIC_ICR2, icr2_val);
-               xapic_write_reg(APIC_ICR, icr_val);
-               data->ipis_sent++;
-
-               /*
-                * Wait up to ~1 sec for halter to indicate that it has:
-                * 1. Received the IPI
-                * 2. Woken up from the halt
-                * 3. Gone back into halt
-                * Current CPUs typically run at 2.x Ghz which is ~2
-                * billion ticks per second.
-                */
-               tsc_start = rdtsc();
-               while (rdtsc() - tsc_start < 2000000000) {
-                       if ((ipis_rcvd != last_ipis_rcvd_count) &&
-                           (data->wake_count != last_wake_count) &&
-                           (data->hlt_count != last_hlt_count))
-                               break;
-               }
-
-               GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
-                            (data->wake_count != last_wake_count) &&
-                            (data->hlt_count != last_hlt_count));
-
-               last_wake_count = data->wake_count;
-               last_hlt_count = data->hlt_count;
-               last_ipis_rcvd_count = ipis_rcvd;
-       }
-}
-
-static void *vcpu_thread(void *arg)
-{
-       struct thread_params *params = (struct thread_params *)arg;
-       struct kvm_vcpu *vcpu = params->vcpu;
-       struct ucall uc;
-       int old;
-       int r;
-
-       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
-       TEST_ASSERT(r == 0,
-                   "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
-                   vcpu->id, r);
-
-       fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
-       vcpu_run(vcpu);
-
-       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-       if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
-               TEST_ASSERT(false,
-                           "vCPU %u exited with error: %s.\n"
-                           "Sending vCPU sent %lu IPIs to halting vCPU\n"
-                           "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
-                           "Halter TPR=%#x PPR=%#x LVR=%#x\n"
-                           "Migrations attempted: %lu\n"
-                           "Migrations completed: %lu",
-                           vcpu->id, (const char *)uc.args[0],
-                           params->data->ipis_sent, params->data->hlt_count,
-                           params->data->wake_count,
-                           *params->pipis_rcvd, params->data->halter_tpr,
-                           params->data->halter_ppr, params->data->halter_lvr,
-                           params->data->migrations_attempted,
-                           params->data->migrations_completed);
-       }
-
-       return NULL;
-}
-
-static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
-{
-       void *retval;
-       int r;
-
-       r = pthread_cancel(thread);
-       TEST_ASSERT(r == 0,
-                   "pthread_cancel on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-
-       r = pthread_join(thread, &retval);
-       TEST_ASSERT(r == 0,
-                   "pthread_join on vcpu_id=%d failed with errno=%d",
-                   vcpu->id, r);
-       TEST_ASSERT(retval == PTHREAD_CANCELED,
-                   "expected retval=%p, got %p", PTHREAD_CANCELED,
-                   retval);
-}
-
-void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
-                  uint64_t *pipis_rcvd)
-{
-       long pages_not_moved;
-       unsigned long nodemask = 0;
-       unsigned long nodemasks[sizeof(nodemask) * 8];
-       int nodes = 0;
-       time_t start_time, last_update, now;
-       time_t interval_secs = 1;
-       int i, r;
-       int from, to;
-       unsigned long bit;
-       uint64_t hlt_count;
-       uint64_t wake_count;
-       uint64_t ipis_sent;
-
-       fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
-               delay_usecs);
-
-       /* Get set of first 64 numa nodes available */
-       r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
-                         0, MPOL_F_MEMS_ALLOWED);
-       TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
-
-       fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
-               "(each 1-bit indicates node is present): %#lx\n",
-               sizeof(nodemask) * 8, nodemask);
-
-       /* Init array of masks containing a single-bit in each, one for each
-        * available node. migrate_pages called below requires specifying nodes
-        * as bit masks.
-        */
-       for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
-               if (nodemask & bit) {
-                       nodemasks[nodes] = nodemask & bit;
-                       nodes++;
-               }
-       }
-
-       TEST_ASSERT(nodes > 1,
-                   "Did not find at least 2 numa nodes. Can't do migration");
-
-       fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
-
-       from = 0;
-       to = 1;
-       start_time = time(NULL);
-       last_update = start_time;
-
-       ipis_sent = data->ipis_sent;
-       hlt_count = data->hlt_count;
-       wake_count = data->wake_count;
-
-       while ((int)(time(NULL) - start_time) < run_secs) {
-               data->migrations_attempted++;
-
-               /*
-                * migrate_pages with PID=0 will migrate all pages of this
-                * process between the nodes specified as bitmasks. The page
-                * backing the APIC access address belongs to this process
-                * because it is allocated by KVM in the context of the
-                * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
-                * test may break or give a false positive signal.
-                */
-               pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
-                                               &nodemasks[from],
-                                               &nodemasks[to]);
-               if (pages_not_moved < 0)
-                       fprintf(stderr,
-                               "migrate_pages failed, errno=%d\n", errno);
-               else if (pages_not_moved > 0)
-                       fprintf(stderr,
-                               "migrate_pages could not move %ld pages\n",
-                               pages_not_moved);
-               else
-                       data->migrations_completed++;
-
-               from = to;
-               to++;
-               if (to == nodes)
-                       to = 0;
-
-               now = time(NULL);
-               if (((now - start_time) % interval_secs == 0) &&
-                   (now != last_update)) {
-                       last_update = now;
-                       fprintf(stderr,
-                               "%lu seconds: Migrations attempted=%lu completed=%lu, "
-                               "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
-                               now - start_time, data->migrations_attempted,
-                               data->migrations_completed,
-                               data->ipis_sent, *pipis_rcvd,
-                               data->hlt_count, data->wake_count);
-
-                       TEST_ASSERT(ipis_sent != data->ipis_sent &&
-                                   hlt_count != data->hlt_count &&
-                                   wake_count != data->wake_count,
-                                   "IPI, HLT and wake count have not increased "
-                                   "in the last %lu seconds. "
-                                   "HLTer is likely hung.", interval_secs);
-
-                       ipis_sent = data->ipis_sent;
-                       hlt_count = data->hlt_count;
-                       wake_count = data->wake_count;
-               }
-               usleep(delay_usecs);
-       }
-}
-
-void get_cmdline_args(int argc, char *argv[], int *run_secs,
-                     bool *migrate, int *delay_usecs)
-{
-       for (;;) {
-               int opt = getopt(argc, argv, "s:d:m");
-
-               if (opt == -1)
-                       break;
-               switch (opt) {
-               case 's':
-                       *run_secs = parse_size(optarg);
-                       break;
-               case 'm':
-                       *migrate = true;
-                       break;
-               case 'd':
-                       *delay_usecs = parse_size(optarg);
-                       break;
-               default:
-                       TEST_ASSERT(false,
-                                   "Usage: -s <runtime seconds>. Default is %d seconds.\n"
-                                   "-m adds calls to migrate_pages while vCPUs are running."
-                                   " Default is no migrations.\n"
-                                   "-d <delay microseconds> - delay between migrate_pages() calls."
-                                   " Default is %d microseconds.",
-                                   DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
-               }
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       int r;
-       int wait_secs;
-       const int max_halter_wait = 10;
-       int run_secs = 0;
-       int delay_usecs = 0;
-       struct test_data_page *data;
-       vm_vaddr_t test_data_page_vaddr;
-       bool migrate = false;
-       pthread_t threads[2];
-       struct thread_params params[2];
-       struct kvm_vm *vm;
-       uint64_t *pipis_rcvd;
-
-       get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
-       if (run_secs <= 0)
-               run_secs = DEFAULT_RUN_SECS;
-       if (delay_usecs <= 0)
-               delay_usecs = DEFAULT_DELAY_USECS;
-
-       vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
-
-       vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
-
-       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
-       params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
-
-       test_data_page_vaddr = vm_vaddr_alloc_page(vm);
-       data = addr_gva2hva(vm, test_data_page_vaddr);
-       memset(data, 0, sizeof(*data));
-       params[0].data = data;
-       params[1].data = data;
-
-       vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
-       vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
-
-       pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
-       params[0].pipis_rcvd = pipis_rcvd;
-       params[1].pipis_rcvd = pipis_rcvd;
-
-       /* Start halter vCPU thread and wait for it to execute first HLT. */
-       r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
-       TEST_ASSERT(r == 0,
-                   "pthread_create halter failed errno=%d", errno);
-       fprintf(stderr, "Halter vCPU thread started\n");
-
-       wait_secs = 0;
-       while ((wait_secs < max_halter_wait) && !data->hlt_count) {
-               sleep(1);
-               wait_secs++;
-       }
-
-       TEST_ASSERT(data->hlt_count,
-                   "Halter vCPU did not execute first HLT within %d seconds",
-                   max_halter_wait);
-
-       fprintf(stderr,
-               "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
-               data->halter_apic_id, wait_secs);
-
-       r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
-       TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
-
-       fprintf(stderr,
-               "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
-               run_secs);
-
-       if (!migrate)
-               sleep(run_secs);
-       else
-               do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
-
-       /*
-        * Cancel threads and wait for them to stop.
-        */
-       cancel_join_vcpu_thread(threads[0], params[0].vcpu);
-       cancel_join_vcpu_thread(threads[1], params[1].vcpu);
-
-       fprintf(stderr,
-               "Test successful after running for %d seconds.\n"
-               "Sending vCPU sent %lu IPIs to halting vCPU\n"
-               "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
-               "Halter APIC ID=%#x\n"
-               "Sender ICR value=%#x ICR2 value=%#x\n"
-               "Halter TPR=%#x PPR=%#x LVR=%#x\n"
-               "Migrations attempted: %lu\n"
-               "Migrations completed: %lu\n",
-               run_secs, data->ipis_sent,
-               data->hlt_count, data->wake_count, *pipis_rcvd,
-               data->halter_apic_id,
-               data->icr, data->icr2,
-               data->halter_tpr, data->halter_ppr, data->halter_lvr,
-               data->migrations_attempted, data->migrations_completed);
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
deleted file mode 100644 (file)
index 88bcca1..0000000
+++ /dev/null
@@ -1,262 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "apic.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-struct xapic_vcpu {
-       struct kvm_vcpu *vcpu;
-       bool is_x2apic;
-       bool has_xavic_errata;
-};
-
-static void xapic_guest_code(void)
-{
-       asm volatile("cli");
-
-       xapic_enable();
-
-       while (1) {
-               uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
-                              (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
-
-               xapic_write_reg(APIC_ICR2, val >> 32);
-               xapic_write_reg(APIC_ICR, val);
-               GUEST_SYNC(val);
-       }
-}
-
-#define X2APIC_RSVD_BITS_MASK  (GENMASK_ULL(31, 20) | \
-                               GENMASK_ULL(17, 16) | \
-                               GENMASK_ULL(13, 13))
-
-static void x2apic_guest_code(void)
-{
-       asm volatile("cli");
-
-       x2apic_enable();
-
-       do {
-               uint64_t val = x2apic_read_reg(APIC_IRR) |
-                              x2apic_read_reg(APIC_IRR + 0x10) << 32;
-
-               if (val & X2APIC_RSVD_BITS_MASK) {
-                       x2apic_write_reg_fault(APIC_ICR, val);
-               } else {
-                       x2apic_write_reg(APIC_ICR, val);
-                       GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val);
-               }
-               GUEST_SYNC(val);
-       } while (1);
-}
-
-static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
-{
-       struct kvm_vcpu *vcpu = x->vcpu;
-       struct kvm_lapic_state xapic;
-       struct ucall uc;
-       uint64_t icr;
-
-       /*
-        * Tell the guest what ICR value to write.  Use the IRR to pass info,
-        * all bits are valid and should not be modified by KVM (ignoring the
-        * fact that vectors 0-15 are technically illegal).
-        */
-       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
-       *((u32 *)&xapic.regs[APIC_IRR]) = val;
-       *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
-       vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
-
-       vcpu_run(vcpu);
-       TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
-       TEST_ASSERT_EQ(uc.args[1], val);
-
-       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
-       icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
-             (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
-       if (!x->is_x2apic) {
-               if (!x->has_xavic_errata)
-                       val &= (-1u | (0xffull << (32 + 24)));
-       } else if (val & X2APIC_RSVD_BITS_MASK) {
-               return;
-       }
-
-       if (x->has_xavic_errata)
-               TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
-       else
-               TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
-}
-
-static void __test_icr(struct xapic_vcpu *x, uint64_t val)
-{
-       /*
-        * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
-        * it is as _must_ be zero.  Intel simply ignores the bit.  Don't test
-        * the BUSY bit for x2APIC, as there is no single correct behavior.
-        */
-       if (!x->is_x2apic)
-               ____test_icr(x, val | APIC_ICR_BUSY);
-
-       ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
-}
-
-static void test_icr(struct xapic_vcpu *x)
-{
-       struct kvm_vcpu *vcpu = x->vcpu;
-       uint64_t icr, i, j;
-
-       icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
-       for (i = 0; i <= 0xff; i++)
-               __test_icr(x, icr | i);
-
-       icr = APIC_INT_ASSERT | APIC_DM_FIXED;
-       for (i = 0; i <= 0xff; i++)
-               __test_icr(x, icr | i);
-
-       /*
-        * Send all flavors of IPIs to non-existent vCPUs.  TODO: use number of
-        * vCPUs, not vcpu.id + 1.  Arbitrarily use vector 0xff.
-        */
-       icr = APIC_INT_ASSERT | 0xff;
-       for (i = 0; i < 0xff; i++) {
-               if (i == vcpu->id)
-                       continue;
-               for (j = 0; j < 8; j++)
-                       __test_icr(x, i << (32 + 24) | icr | (j << 8));
-       }
-
-       /* And again with a shorthand destination for all types of IPIs. */
-       icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
-       for (i = 0; i < 8; i++)
-               __test_icr(x, icr | (i << 8));
-
-       /* And a few garbage value, just make sure it's an IRQ (blocked). */
-       __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
-       __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
-       __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
-}
-
-static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
-{
-       uint32_t apic_id, expected;
-       struct kvm_lapic_state xapic;
-
-       vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
-
-       vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
-
-       expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
-       apic_id = *((u32 *)&xapic.regs[APIC_ID]);
-
-       TEST_ASSERT(apic_id == expected,
-                   "APIC_ID not set back to %s format; wanted = %x, got = %x",
-                   (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
-                   expected, apic_id);
-}
-
-/*
- * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
- * stuffs MSR_IA32_APICBASE.  Setting the APIC_ID when x2APIC is enabled and
- * when the APIC transitions for DISABLED to ENABLED is architectural behavior
- * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
- * attempted to transition from x2APIC to xAPIC without disabling the APIC is
- * architecturally disallowed.
- */
-static void test_apic_id(void)
-{
-       const uint32_t NR_VCPUS = 3;
-       struct kvm_vcpu *vcpus[NR_VCPUS];
-       uint64_t apic_base;
-       struct kvm_vm *vm;
-       int i;
-
-       vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
-       vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
-
-       for (i = 0; i < NR_VCPUS; i++) {
-               apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
-
-               TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
-                           "APIC not in ENABLED state at vCPU RESET");
-               TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
-                           "APIC not in xAPIC mode at vCPU RESET");
-
-               __test_apic_id(vcpus[i], apic_base);
-               __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
-               __test_apic_id(vcpus[i], apic_base);
-       }
-
-       kvm_vm_free(vm);
-}
-
-static void test_x2apic_id(void)
-{
-       struct kvm_lapic_state lapic = {};
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       int i;
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-       vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
-
-       /*
-        * Try stuffing a modified x2APIC ID, KVM should ignore the value and
-        * always return the vCPU's default/readonly x2APIC ID.
-        */
-       for (i = 0; i <= 0xff; i++) {
-               *(u32 *)(lapic.regs + APIC_ID) = i << 24;
-               *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED;
-               vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
-
-               vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
-               TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24,
-                           "x2APIC ID should be fully readonly");
-       }
-
-       kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-       struct xapic_vcpu x = {
-               .vcpu = NULL,
-               .is_x2apic = true,
-       };
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
-       test_icr(&x);
-       kvm_vm_free(vm);
-
-       /*
-        * Use a second VM for the xAPIC test so that x2APIC can be hidden from
-        * the guest in order to test AVIC.  KVM disallows changing CPUID after
-        * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
-        */
-       vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
-       x.is_x2apic = false;
-
-       /*
-        * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit),
-        * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel
-        * drops writes, AMD does not).  Account for the errata when checking
-        * that KVM reads back what was written.
-        */
-       x.has_xavic_errata = host_cpu_is_amd &&
-                            get_kvm_amd_param_bool("avic");
-
-       vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
-
-       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-       test_icr(&x);
-       kvm_vm_free(vm);
-
-       test_apic_id();
-       test_x2apic_id();
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
deleted file mode 100644 (file)
index c8a5c5e..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * XCR0 cpuid test
- *
- * Copyright (C) 2022, Google LLC.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-/*
- * Assert that architectural dependency rules are satisfied, e.g. that AVX is
- * supported if and only if SSE is supported.
- */
-#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)          \
-do {                                                                                   \
-       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies));       \
-                                                                                       \
-       __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) ||                    \
-                      __supported == ((xfeatures) | (dependencies)),                   \
-                      "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx",  \
-                      __supported, (xfeatures), (dependencies));                       \
-} while (0)
-
-/*
- * Assert that KVM reports a sane, usable as-is XCR0.  Architecturally, a CPU
- * isn't strictly required to _support_ all XFeatures related to a feature, but
- * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
- * disabled coherently.  E.g. a CPU can technically enumerate supported for
- * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
- * XTILE_DATA will #GP.
- */
-#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures)         \
-do {                                                                   \
-       uint64_t __supported = (supported_xcr0) & (xfeatures);          \
-                                                                       \
-       __GUEST_ASSERT(!__supported || __supported == (xfeatures),      \
-                      "supported = 0x%lx, xfeatures = 0x%llx",         \
-                      __supported, (xfeatures));                       \
-} while (0)
-
-static void guest_code(void)
-{
-       uint64_t initial_xcr0;
-       uint64_t supported_xcr0;
-       int i, vector;
-
-       set_cr4(get_cr4() | X86_CR4_OSXSAVE);
-
-       initial_xcr0 = xgetbv(0);
-       supported_xcr0 = this_cpu_supported_xcr0();
-
-       GUEST_ASSERT(initial_xcr0 == supported_xcr0);
-
-       /* Check AVX */
-       ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
-                                    XFEATURE_MASK_YMM,
-                                    XFEATURE_MASK_SSE);
-
-       /* Check MPX */
-       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
-                                   XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
-
-       /* Check AVX-512 */
-       ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
-                                    XFEATURE_MASK_AVX512,
-                                    XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
-       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
-                                   XFEATURE_MASK_AVX512);
-
-       /* Check AMX */
-       ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
-                                   XFEATURE_MASK_XTILE);
-
-       vector = xsetbv_safe(0, XFEATURE_MASK_FP);
-       __GUEST_ASSERT(!vector,
-                      "Expected success on XSETBV(FP), got vector '0x%x'",
-                      vector);
-
-       vector = xsetbv_safe(0, supported_xcr0);
-       __GUEST_ASSERT(!vector,
-                      "Expected success on XSETBV(0x%lx), got vector '0x%x'",
-                      supported_xcr0, vector);
-
-       for (i = 0; i < 64; i++) {
-               if (supported_xcr0 & BIT_ULL(i))
-                       continue;
-
-               vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
-               __GUEST_ASSERT(vector == GP_VECTOR,
-                              "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
-                              BIT_ULL(i), supported_xcr0, vector);
-       }
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
-       struct kvm_vm *vm;
-       struct ucall uc;
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       run = vcpu->run;
-
-       while (1) {
-               vcpu_run(vcpu);
-
-               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-                           "Unexpected exit reason: %u (%s),",
-                           run->exit_reason,
-                           exit_reason_str(run->exit_reason));
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
-               }
-       }
-
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
deleted file mode 100644 (file)
index a59b3c7..0000000
+++ /dev/null
@@ -1,1161 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2021 Amazon.com, Inc. or its affiliates.
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-#include <stdint.h>
-#include <time.h>
-#include <sched.h>
-#include <signal.h>
-#include <pthread.h>
-
-#include <sys/eventfd.h>
-
-#define SHINFO_REGION_GVA      0xc0000000ULL
-#define SHINFO_REGION_GPA      0xc0000000ULL
-#define SHINFO_REGION_SLOT     10
-
-#define DUMMY_REGION_GPA       (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
-#define DUMMY_REGION_SLOT      11
-
-#define DUMMY_REGION_GPA_2     (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
-#define DUMMY_REGION_SLOT_2    12
-
-#define SHINFO_ADDR    (SHINFO_REGION_GPA)
-#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
-#define PVTIME_ADDR    (SHINFO_REGION_GPA + PAGE_SIZE)
-#define RUNSTATE_ADDR  (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
-
-#define SHINFO_VADDR   (SHINFO_REGION_GVA)
-#define VCPU_INFO_VADDR        (SHINFO_REGION_GVA + 0x40)
-#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
-
-#define EVTCHN_VECTOR  0x10
-
-#define EVTCHN_TEST1 15
-#define EVTCHN_TEST2 66
-#define EVTCHN_TIMER 13
-
-enum {
-       TEST_INJECT_VECTOR = 0,
-       TEST_RUNSTATE_runnable,
-       TEST_RUNSTATE_blocked,
-       TEST_RUNSTATE_offline,
-       TEST_RUNSTATE_ADJUST,
-       TEST_RUNSTATE_DATA,
-       TEST_STEAL_TIME,
-       TEST_EVTCHN_MASKED,
-       TEST_EVTCHN_UNMASKED,
-       TEST_EVTCHN_SLOWPATH,
-       TEST_EVTCHN_SEND_IOCTL,
-       TEST_EVTCHN_HCALL,
-       TEST_EVTCHN_HCALL_SLOWPATH,
-       TEST_EVTCHN_HCALL_EVENTFD,
-       TEST_TIMER_SETUP,
-       TEST_TIMER_WAIT,
-       TEST_TIMER_RESTORE,
-       TEST_POLL_READY,
-       TEST_POLL_TIMEOUT,
-       TEST_POLL_MASKED,
-       TEST_POLL_WAKE,
-       SET_VCPU_INFO,
-       TEST_TIMER_PAST,
-       TEST_LOCKING_SEND_RACE,
-       TEST_LOCKING_POLL_RACE,
-       TEST_LOCKING_POLL_TIMEOUT,
-       TEST_DONE,
-
-       TEST_GUEST_SAW_IRQ,
-};
-
-#define XEN_HYPERCALL_MSR      0x40000000
-
-#define MIN_STEAL_TIME         50000
-
-#define SHINFO_RACE_TIMEOUT    2       /* seconds */
-
-#define __HYPERVISOR_set_timer_op      15
-#define __HYPERVISOR_sched_op          29
-#define __HYPERVISOR_event_channel_op  32
-
-#define SCHEDOP_poll                   3
-
-#define EVTCHNOP_send                  4
-
-#define EVTCHNSTAT_interdomain         2
-
-struct evtchn_send {
-       u32 port;
-};
-
-struct sched_poll {
-       u32 *ports;
-       unsigned int nr_ports;
-       u64 timeout;
-};
-
-struct pvclock_vcpu_time_info {
-       u32   version;
-       u32   pad0;
-       u64   tsc_timestamp;
-       u64   system_time;
-       u32   tsc_to_system_mul;
-       s8    tsc_shift;
-       u8    flags;
-       u8    pad[2];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
-       u32   version;
-       u32   sec;
-       u32   nsec;
-} __attribute__((__packed__));
-
-struct vcpu_runstate_info {
-       uint32_t state;
-       uint64_t state_entry_time;
-       uint64_t time[5]; /* Extra field for overrun check */
-};
-
-struct compat_vcpu_runstate_info {
-       uint32_t state;
-       uint64_t state_entry_time;
-       uint64_t time[5];
-} __attribute__((__packed__));
-
-struct arch_vcpu_info {
-       unsigned long cr2;
-       unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
-};
-
-struct vcpu_info {
-       uint8_t evtchn_upcall_pending;
-       uint8_t evtchn_upcall_mask;
-       unsigned long evtchn_pending_sel;
-       struct arch_vcpu_info arch;
-       struct pvclock_vcpu_time_info time;
-}; /* 64 bytes (x86) */
-
-struct shared_info {
-       struct vcpu_info vcpu_info[32];
-       unsigned long evtchn_pending[64];
-       unsigned long evtchn_mask[64];
-       struct pvclock_wall_clock wc;
-       uint32_t wc_sec_hi;
-       /* arch_shared_info here */
-};
-
-#define RUNSTATE_running  0
-#define RUNSTATE_runnable 1
-#define RUNSTATE_blocked  2
-#define RUNSTATE_offline  3
-
-static const char *runstate_names[] = {
-       "running",
-       "runnable",
-       "blocked",
-       "offline"
-};
-
-struct {
-       struct kvm_irq_routing info;
-       struct kvm_irq_routing_entry entries[2];
-} irq_routes;
-
-static volatile bool guest_saw_irq;
-
-static void evtchn_handler(struct ex_regs *regs)
-{
-       struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
-
-       vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0);
-       vcpu_arch_put_guest(vi->evtchn_pending_sel, 0);
-       guest_saw_irq = true;
-
-       GUEST_SYNC(TEST_GUEST_SAW_IRQ);
-}
-
-static void guest_wait_for_irq(void)
-{
-       while (!guest_saw_irq)
-               __asm__ __volatile__ ("rep nop" : : : "memory");
-       guest_saw_irq = false;
-}
-
-static void guest_code(void)
-{
-       struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
-       int i;
-
-       __asm__ __volatile__(
-               "sti\n"
-               "nop\n"
-       );
-
-       /* Trigger an interrupt injection */
-       GUEST_SYNC(TEST_INJECT_VECTOR);
-
-       guest_wait_for_irq();
-
-       /* Test having the host set runstates manually */
-       GUEST_SYNC(TEST_RUNSTATE_runnable);
-       GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
-       GUEST_ASSERT(rs->state == 0);
-
-       GUEST_SYNC(TEST_RUNSTATE_blocked);
-       GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
-       GUEST_ASSERT(rs->state == 0);
-
-       GUEST_SYNC(TEST_RUNSTATE_offline);
-       GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
-       GUEST_ASSERT(rs->state == 0);
-
-       /* Test runstate time adjust */
-       GUEST_SYNC(TEST_RUNSTATE_ADJUST);
-       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
-       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
-
-       /* Test runstate time set */
-       GUEST_SYNC(TEST_RUNSTATE_DATA);
-       GUEST_ASSERT(rs->state_entry_time >= 0x8000);
-       GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
-       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
-       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
-
-       /* sched_yield() should result in some 'runnable' time */
-       GUEST_SYNC(TEST_STEAL_TIME);
-       GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
-
-       /* Attempt to deliver a *masked* interrupt */
-       GUEST_SYNC(TEST_EVTCHN_MASKED);
-
-       /* Wait until we see the bit set */
-       struct shared_info *si = (void *)SHINFO_VADDR;
-       while (!si->evtchn_pending[0])
-               __asm__ __volatile__ ("rep nop" : : : "memory");
-
-       /* Now deliver an *unmasked* interrupt */
-       GUEST_SYNC(TEST_EVTCHN_UNMASKED);
-
-       guest_wait_for_irq();
-
-       /* Change memslots and deliver an interrupt */
-       GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
-
-       guest_wait_for_irq();
-
-       /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
-       GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_EVTCHN_HCALL);
-
-       /* Our turn. Deliver event channel (to ourselves) with
-        * EVTCHNOP_send hypercall. */
-       struct evtchn_send s = { .port = 127 };
-       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
-
-       /*
-        * Same again, but this time the host has messed with memslots so it
-        * should take the slow path in kvm_xen_set_evtchn().
-        */
-       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
-
-       /* Deliver "outbound" event channel to an eventfd which
-        * happens to be one of our own irqfds. */
-       s.port = 197;
-       xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_TIMER_SETUP);
-
-       /* Set a timer 100ms in the future. */
-       xen_hypercall(__HYPERVISOR_set_timer_op,
-                     rs->state_entry_time + 100000000, NULL);
-
-       GUEST_SYNC(TEST_TIMER_WAIT);
-
-       /* Now wait for the timer */
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_TIMER_RESTORE);
-
-       /* The host has 'restored' the timer. Just wait for it. */
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_POLL_READY);
-
-       /* Poll for an event channel port which is already set */
-       u32 ports[1] = { EVTCHN_TIMER };
-       struct sched_poll p = {
-               .ports = ports,
-               .nr_ports = 1,
-               .timeout = 0,
-       };
-
-       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       GUEST_SYNC(TEST_POLL_TIMEOUT);
-
-       /* Poll for an unset port and wait for the timeout. */
-       p.timeout = 100000000;
-       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       GUEST_SYNC(TEST_POLL_MASKED);
-
-       /* A timer will wake the masked port we're waiting on, while we poll */
-       p.timeout = 0;
-       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       GUEST_SYNC(TEST_POLL_WAKE);
-
-       /* Set the vcpu_info to point at exactly the place it already is to
-        * make sure the attribute is functional. */
-       GUEST_SYNC(SET_VCPU_INFO);
-
-       /* A timer wake an *unmasked* port which should wake us with an
-        * actual interrupt, while we're polling on a different port. */
-       ports[0]++;
-       p.timeout = 0;
-       xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_TIMER_PAST);
-
-       /* Timer should have fired already */
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_LOCKING_SEND_RACE);
-       /* Racing host ioctls */
-
-       guest_wait_for_irq();
-
-       GUEST_SYNC(TEST_LOCKING_POLL_RACE);
-       /* Racing vmcall against host ioctl */
-
-       ports[0] = 0;
-
-       p = (struct sched_poll) {
-               .ports = ports,
-               .nr_ports = 1,
-               .timeout = 0
-       };
-
-wait_for_timer:
-       /*
-        * Poll for a timer wake event while the worker thread is mucking with
-        * the shared info.  KVM XEN drops timer IRQs if the shared info is
-        * invalid when the timer expires.  Arbitrarily poll 100 times before
-        * giving up and asking the VMM to re-arm the timer.  100 polls should
-        * consume enough time to beat on KVM without taking too long if the
-        * timer IRQ is dropped due to an invalid event channel.
-        */
-       for (i = 0; i < 100 && !guest_saw_irq; i++)
-               __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
-
-       /*
-        * Re-send the timer IRQ if it was (likely) dropped due to the timer
-        * expiring while the event channel was invalid.
-        */
-       if (!guest_saw_irq) {
-               GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
-               goto wait_for_timer;
-       }
-       guest_saw_irq = false;
-
-       GUEST_SYNC(TEST_DONE);
-}
-
-static struct shared_info *shinfo;
-static struct vcpu_info *vinfo;
-static struct kvm_vcpu *vcpu;
-
-static void handle_alrm(int sig)
-{
-       if (vinfo)
-               printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
-       vcpu_dump(stdout, vcpu, 0);
-       TEST_FAIL("IRQ delivery timed out");
-}
-
-static void *juggle_shinfo_state(void *arg)
-{
-       struct kvm_vm *vm = (struct kvm_vm *)arg;
-
-       struct kvm_xen_hvm_attr cache_activate_gfn = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
-               .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
-       };
-
-       struct kvm_xen_hvm_attr cache_deactivate_gfn = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
-               .u.shared_info.gfn = KVM_XEN_INVALID_GFN
-       };
-
-       struct kvm_xen_hvm_attr cache_activate_hva = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
-               .u.shared_info.hva = (unsigned long)shinfo
-       };
-
-       struct kvm_xen_hvm_attr cache_deactivate_hva = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
-               .u.shared_info.hva = 0
-       };
-
-       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
-
-       for (;;) {
-               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
-               pthread_testcancel();
-               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
-
-               if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
-                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
-                       pthread_testcancel();
-                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
-               }
-       }
-
-       return NULL;
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_xen_hvm_attr evt_reset;
-       struct kvm_vm *vm;
-       pthread_t thread;
-       bool verbose;
-       int ret;
-
-       verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
-                              !strncmp(argv[1], "--verbose", 10));
-
-       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
-       TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
-
-       bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
-       bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
-       bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
-       bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
-       bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       /* Map a region for the shared_info page */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
-       virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
-
-       shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
-
-       int zero_fd = open("/dev/zero", O_RDONLY);
-       TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
-
-       struct kvm_xen_hvm_config hvmc = {
-               .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
-               .msr = XEN_HYPERCALL_MSR,
-       };
-
-       /* Let the kernel know that we *will* use it for sending all
-        * event channels, which lets it intercept SCHEDOP_poll */
-       if (do_evtchn_tests)
-               hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
-
-       vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
-
-       struct kvm_xen_hvm_attr lm = {
-               .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
-               .u.long_mode = 1,
-       };
-       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
-       if (do_runstate_flag) {
-               struct kvm_xen_hvm_attr ruf = {
-                       .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
-                       .u.runstate_update_flag = 1,
-               };
-               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
-
-               ruf.u.runstate_update_flag = 0;
-               vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
-               TEST_ASSERT(ruf.u.runstate_update_flag == 1,
-                           "Failed to read back RUNSTATE_UPDATE_FLAG attr");
-       }
-
-       struct kvm_xen_hvm_attr ha = {};
-
-       if (has_shinfo_hva) {
-               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
-               ha.u.shared_info.hva = (unsigned long)shinfo;
-       } else {
-               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
-               ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
-       }
-
-       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
-
-       /*
-        * Test what happens when the HVA of the shinfo page is remapped after
-        * the kernel has a reference to it. But make sure we copy the clock
-        * info over since that's only set at setup time, and we test it later.
-        */
-       struct pvclock_wall_clock wc_copy = shinfo->wc;
-       void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
-       TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
-       shinfo->wc = wc_copy;
-
-       struct kvm_xen_vcpu_attr vi = {
-               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
-               .u.gpa = VCPU_INFO_ADDR,
-       };
-       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
-
-       struct kvm_xen_vcpu_attr pvclock = {
-               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
-               .u.gpa = PVTIME_ADDR,
-       };
-       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
-
-       struct kvm_xen_hvm_attr vec = {
-               .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
-               .u.vector = EVTCHN_VECTOR,
-       };
-       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
-
-       vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
-
-       if (do_runstate_tests) {
-               struct kvm_xen_vcpu_attr st = {
-                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
-                       .u.gpa = RUNSTATE_ADDR,
-               };
-               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
-       }
-
-       int irq_fd[2] = { -1, -1 };
-
-       if (do_eventfd_tests) {
-               irq_fd[0] = eventfd(0, 0);
-               irq_fd[1] = eventfd(0, 0);
-
-               /* Unexpected, but not a KVM failure */
-               if (irq_fd[0] == -1 || irq_fd[1] == -1)
-                       do_evtchn_tests = do_eventfd_tests = false;
-       }
-
-       if (do_eventfd_tests) {
-               irq_routes.info.nr = 2;
-
-               irq_routes.entries[0].gsi = 32;
-               irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
-               irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
-               irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
-               irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
-
-               irq_routes.entries[1].gsi = 33;
-               irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
-               irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
-               irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
-               irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
-
-               vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
-
-               struct kvm_irqfd ifd = { };
-
-               ifd.fd = irq_fd[0];
-               ifd.gsi = 32;
-               vm_ioctl(vm, KVM_IRQFD, &ifd);
-
-               ifd.fd = irq_fd[1];
-               ifd.gsi = 33;
-               vm_ioctl(vm, KVM_IRQFD, &ifd);
-
-               struct sigaction sa = { };
-               sa.sa_handler = handle_alrm;
-               sigaction(SIGALRM, &sa, NULL);
-       }
-
-       struct kvm_xen_vcpu_attr tmr = {
-               .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
-               .u.timer.port = EVTCHN_TIMER,
-               .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
-               .u.timer.expires_ns = 0
-       };
-
-       if (do_evtchn_tests) {
-               struct kvm_xen_hvm_attr inj = {
-                       .type = KVM_XEN_ATTR_TYPE_EVTCHN,
-                       .u.evtchn.send_port = 127,
-                       .u.evtchn.type = EVTCHNSTAT_interdomain,
-                       .u.evtchn.flags = 0,
-                       .u.evtchn.deliver.port.port = EVTCHN_TEST1,
-                       .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
-                       .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
-               };
-               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
-
-               /* Test migration to a different vCPU */
-               inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
-               inj.u.evtchn.deliver.port.vcpu = vcpu->id;
-               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
-
-               inj.u.evtchn.send_port = 197;
-               inj.u.evtchn.deliver.eventfd.port = 0;
-               inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
-               inj.u.evtchn.flags = 0;
-               vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
-
-               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-       }
-       vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
-       vinfo->evtchn_upcall_pending = 0;
-
-       struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
-       rs->state = 0x5a;
-
-       bool evtchn_irq_expected = false;
-
-       for (;;) {
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC: {
-                       struct kvm_xen_vcpu_attr rst;
-                       long rundelay;
-
-                       if (do_runstate_tests)
-                               TEST_ASSERT(rs->state_entry_time == rs->time[0] +
-                                           rs->time[1] + rs->time[2] + rs->time[3],
-                                           "runstate times don't add up");
-
-                       switch (uc.args[1]) {
-                       case TEST_INJECT_VECTOR:
-                               if (verbose)
-                                       printf("Delivering evtchn upcall\n");
-                               evtchn_irq_expected = true;
-                               vinfo->evtchn_upcall_pending = 1;
-                               break;
-
-                       case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
-                               TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
-                               if (!do_runstate_tests)
-                                       goto done;
-                               if (verbose)
-                                       printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
-                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
-                               rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
-                                       TEST_RUNSTATE_runnable;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
-                               break;
-
-                       case TEST_RUNSTATE_ADJUST:
-                               if (verbose)
-                                       printf("Testing RUNSTATE_ADJUST\n");
-                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
-                               memset(&rst.u, 0, sizeof(rst.u));
-                               rst.u.runstate.state = (uint64_t)-1;
-                               rst.u.runstate.time_blocked =
-                                       0x5a - rs->time[RUNSTATE_blocked];
-                               rst.u.runstate.time_offline =
-                                       0x6b6b - rs->time[RUNSTATE_offline];
-                               rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
-                                       rst.u.runstate.time_offline;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
-                               break;
-
-                       case TEST_RUNSTATE_DATA:
-                               if (verbose)
-                                       printf("Testing RUNSTATE_DATA\n");
-                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
-                               memset(&rst.u, 0, sizeof(rst.u));
-                               rst.u.runstate.state = RUNSTATE_running;
-                               rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
-                               rst.u.runstate.time_blocked = 0x6b6b;
-                               rst.u.runstate.time_offline = 0x5a;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
-                               break;
-
-                       case TEST_STEAL_TIME:
-                               if (verbose)
-                                       printf("Testing steal time\n");
-                               /* Yield until scheduler delay exceeds target */
-                               rundelay = get_run_delay() + MIN_STEAL_TIME;
-                               do {
-                                       sched_yield();
-                               } while (get_run_delay() < rundelay);
-                               break;
-
-                       case TEST_EVTCHN_MASKED:
-                               if (!do_eventfd_tests)
-                                       goto done;
-                               if (verbose)
-                                       printf("Testing masked event channel\n");
-                               shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
-                               eventfd_write(irq_fd[0], 1UL);
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_UNMASKED:
-                               if (verbose)
-                                       printf("Testing unmasked event channel\n");
-                               /* Unmask that, but deliver the other one */
-                               shinfo->evtchn_pending[0] = 0;
-                               shinfo->evtchn_mask[0] = 0;
-                               eventfd_write(irq_fd[1], 1UL);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_SLOWPATH:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[1] = 0;
-                               if (verbose)
-                                       printf("Testing event channel after memslot change\n");
-                               vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                                           DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
-                               eventfd_write(irq_fd[0], 1UL);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_SEND_IOCTL:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               if (!do_evtchn_tests)
-                                       goto done;
-
-                               shinfo->evtchn_pending[0] = 0;
-                               if (verbose)
-                                       printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
-
-                               struct kvm_irq_routing_xen_evtchn e;
-                               e.port = EVTCHN_TEST2;
-                               e.vcpu = vcpu->id;
-                               e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
-
-                               vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_HCALL:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[1] = 0;
-
-                               if (verbose)
-                                       printf("Testing guest EVTCHNOP_send direct to evtchn\n");
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_HCALL_SLOWPATH:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[0] = 0;
-
-                               if (verbose)
-                                       printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
-                               vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                                           DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_EVTCHN_HCALL_EVENTFD:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[0] = 0;
-
-                               if (verbose)
-                                       printf("Testing guest EVTCHNOP_send to eventfd\n");
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_TIMER_SETUP:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[1] = 0;
-
-                               if (verbose)
-                                       printf("Testing guest oneshot timer\n");
-                               break;
-
-                       case TEST_TIMER_WAIT:
-                               memset(&tmr, 0, sizeof(tmr));
-                               tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
-                               TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
-                                           "Timer port not returned");
-                               TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
-                                           "Timer priority not returned");
-                               TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
-                                           "Timer expiry not returned");
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_TIMER_RESTORE:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               shinfo->evtchn_pending[0] = 0;
-
-                               if (verbose)
-                                       printf("Testing restored oneshot timer\n");
-
-                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               evtchn_irq_expected = true;
-                               alarm(1);
-                               break;
-
-                       case TEST_POLL_READY:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-
-                               if (verbose)
-                                       printf("Testing SCHEDOP_poll with already pending event\n");
-                               shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
-                               alarm(1);
-                               break;
-
-                       case TEST_POLL_TIMEOUT:
-                               if (verbose)
-                                       printf("Testing SCHEDOP_poll timeout\n");
-                               shinfo->evtchn_pending[0] = 0;
-                               alarm(1);
-                               break;
-
-                       case TEST_POLL_MASKED:
-                               if (verbose)
-                                       printf("Testing SCHEDOP_poll wake on masked event\n");
-
-                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               alarm(1);
-                               break;
-
-                       case TEST_POLL_WAKE:
-                               shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
-                               if (verbose)
-                                       printf("Testing SCHEDOP_poll wake on unmasked event\n");
-
-                               evtchn_irq_expected = true;
-                               tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-
-                               /* Read it back and check the pending time is reported correctly */
-                               tmr.u.timer.expires_ns = 0;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
-                               TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
-                                           "Timer not reported pending");
-                               alarm(1);
-                               break;
-
-                       case SET_VCPU_INFO:
-                               if (has_shinfo_hva) {
-                                       struct kvm_xen_vcpu_attr vih = {
-                                               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
-                                               .u.hva = (unsigned long)vinfo
-                                       };
-                                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
-                               }
-                               break;
-
-                       case TEST_TIMER_PAST:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               /* Read timer and check it is no longer pending */
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
-                               TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
-
-                               shinfo->evtchn_pending[0] = 0;
-                               if (verbose)
-                                       printf("Testing timer in the past\n");
-
-                               evtchn_irq_expected = true;
-                               tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               alarm(1);
-                               break;
-
-                       case TEST_LOCKING_SEND_RACE:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-                               alarm(0);
-
-                               if (verbose)
-                                       printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
-
-                               ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
-                               TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
-
-                               struct kvm_irq_routing_xen_evtchn uxe = {
-                                       .port = 1,
-                                       .vcpu = vcpu->id,
-                                       .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
-                               };
-
-                               evtchn_irq_expected = true;
-                               for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
-                                       __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
-                               break;
-
-                       case TEST_LOCKING_POLL_RACE:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-
-                               if (verbose)
-                                       printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
-
-                               shinfo->evtchn_pending[0] = 1;
-
-                               evtchn_irq_expected = true;
-                               tmr.u.timer.expires_ns = rs->state_entry_time +
-                                                        SHINFO_RACE_TIMEOUT * 1000000000ULL;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               break;
-
-                       case TEST_LOCKING_POLL_TIMEOUT:
-                               /*
-                                * Optional and possibly repeated sync point.
-                                * Injecting the timer IRQ may fail if the
-                                * shinfo is invalid when the timer expires.
-                                * If the timer has expired but the IRQ hasn't
-                                * been delivered, rearm the timer and retry.
-                                */
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
-
-                               /* Resume the guest if the timer is still pending. */
-                               if (tmr.u.timer.expires_ns)
-                                       break;
-
-                               /* All done if the IRQ was delivered. */
-                               if (!evtchn_irq_expected)
-                                       break;
-
-                               tmr.u.timer.expires_ns = rs->state_entry_time +
-                                                        SHINFO_RACE_TIMEOUT * 1000000000ULL;
-                               vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
-                               break;
-                       case TEST_DONE:
-                               TEST_ASSERT(!evtchn_irq_expected,
-                                           "Expected event channel IRQ but it didn't happen");
-
-                               ret = pthread_cancel(thread);
-                               TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
-
-                               ret = pthread_join(thread, 0);
-                               TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
-                               goto done;
-
-                       case TEST_GUEST_SAW_IRQ:
-                               TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
-                               evtchn_irq_expected = false;
-                               break;
-                       }
-                       break;
-               }
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-               }
-       }
-
- done:
-       evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
-       evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
-       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
-
-       alarm(0);
-
-       /*
-        * Just a *really* basic check that things are being put in the
-        * right place. The actual calculations are much the same for
-        * Xen as they are for the KVM variants, so no need to check.
-        */
-       struct pvclock_wall_clock *wc;
-       struct pvclock_vcpu_time_info *ti, *ti2;
-       struct kvm_clock_data kcdata;
-       long long delta;
-
-       wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
-       ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
-       ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
-
-       if (verbose) {
-               printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
-               printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
-                      ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
-                      ti->tsc_shift, ti->flags);
-               printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
-                      ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
-                      ti2->tsc_shift, ti2->flags);
-       }
-
-       TEST_ASSERT(wc->version && !(wc->version & 1),
-                   "Bad wallclock version %x", wc->version);
-
-       vm_ioctl(vm, KVM_GET_CLOCK, &kcdata);
-
-       if (kcdata.flags & KVM_CLOCK_REALTIME) {
-               if (verbose) {
-                       printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
-                              kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC);
-                       printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
-                              kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC);
-               }
-
-               delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock);
-
-               /*
-                * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
-                * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
-                * delta as testing clock accuracy is not the goal here. The test just needs to
-                * check that the value in shinfo is somewhat sane.
-                */
-               TEST_ASSERT(llabs(delta) < NSEC_PER_SEC,
-                           "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
-                           wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC,
-                           (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC);
-       } else {
-               pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
-       }
-
-       TEST_ASSERT(ti->version && !(ti->version & 1),
-                   "Bad time_info version %x", ti->version);
-       TEST_ASSERT(ti2->version && !(ti2->version & 1),
-                   "Bad time_info version %x", ti->version);
-
-       if (do_runstate_tests) {
-               /*
-                * Fetch runstate and check sanity. Strictly speaking in the
-                * general case we might not expect the numbers to be identical
-                * but in this case we know we aren't running the vCPU any more.
-                */
-               struct kvm_xen_vcpu_attr rst = {
-                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
-               };
-               vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
-
-               if (verbose) {
-                       printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
-                              rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
-                              rs->state, rs->state_entry_time);
-                       for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
-                               printf("State %s: %" PRIu64 " ns\n",
-                                      runstate_names[i], rs->time[i]);
-                       }
-               }
-
-               /*
-                * Exercise runstate info at all points across the page boundary, in
-                * 32-bit and 64-bit mode. In particular, test the case where it is
-                * configured in 32-bit mode and then switched to 64-bit mode while
-                * active, which takes it onto the second page.
-                */
-               unsigned long runstate_addr;
-               struct compat_vcpu_runstate_info *crs;
-               for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
-                    runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
-
-                       rs = addr_gpa2hva(vm, runstate_addr);
-                       crs = (void *)rs;
-
-                       memset(rs, 0xa5, sizeof(*rs));
-
-                       /* Set to compatibility mode */
-                       lm.u.long_mode = 0;
-                       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
-                       /* Set runstate to new address (kernel will write it) */
-                       struct kvm_xen_vcpu_attr st = {
-                               .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
-                               .u.gpa = runstate_addr,
-                       };
-                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
-
-                       if (verbose)
-                               printf("Compatibility runstate at %08lx\n", runstate_addr);
-
-                       TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
-                       TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
-                                   "State entry time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
-                                   "Running time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
-                                   "Runnable time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
-                                   "Blocked time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
-                                   "Offline time mismatch");
-                       TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
-                                   "Structure overrun");
-                       TEST_ASSERT(crs->state_entry_time == crs->time[0] +
-                                   crs->time[1] + crs->time[2] + crs->time[3],
-                                   "runstate times don't add up");
-
-
-                       /* Now switch to 64-bit mode */
-                       lm.u.long_mode = 1;
-                       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
-
-                       memset(rs, 0xa5, sizeof(*rs));
-
-                       /* Don't change the address, just trigger a write */
-                       struct kvm_xen_vcpu_attr adj = {
-                               .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
-                               .u.runstate.state = (uint64_t)-1
-                       };
-                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
-
-                       if (verbose)
-                               printf("64-bit runstate at %08lx\n", runstate_addr);
-
-                       TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
-                       TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
-                                   "State entry time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
-                                   "Running time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
-                                   "Runnable time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
-                                   "Blocked time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
-                                   "Offline time mismatch");
-                       TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
-                                   "Structure overrun");
-
-                       TEST_ASSERT(rs->state_entry_time == rs->time[0] +
-                                   rs->time[1] + rs->time[2] + rs->time[3],
-                                   "runstate times don't add up");
-               }
-       }
-
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
deleted file mode 100644 (file)
index 2585087..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * xen_vmcall_test
- *
- * Copyright © 2020 Amazon.com, Inc. or its affiliates.
- *
- * Userspace hypercall testing
- */
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "hyperv.h"
-
-#define HCALL_REGION_GPA       0xc0000000ULL
-#define HCALL_REGION_SLOT      10
-
-#define INPUTVALUE 17
-#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
-#define RETVALUE 0xcafef00dfbfbffffUL
-
-#define XEN_HYPERCALL_MSR      0x40000200
-#define HV_GUEST_OS_ID_MSR     0x40000000
-#define HV_HYPERCALL_MSR       0x40000001
-
-#define HVCALL_SIGNAL_EVENT            0x005d
-#define HV_STATUS_INVALID_ALIGNMENT    4
-
-static void guest_code(void)
-{
-       unsigned long rax = INPUTVALUE;
-       unsigned long rdi = ARGVALUE(1);
-       unsigned long rsi = ARGVALUE(2);
-       unsigned long rdx = ARGVALUE(3);
-       unsigned long rcx;
-       register unsigned long r10 __asm__("r10") = ARGVALUE(4);
-       register unsigned long r8 __asm__("r8") = ARGVALUE(5);
-       register unsigned long r9 __asm__("r9") = ARGVALUE(6);
-
-       /* First a direct invocation of 'vmcall' */
-       __asm__ __volatile__("vmcall" :
-                            "=a"(rax) :
-                            "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
-                            "r"(r10), "r"(r8), "r"(r9));
-       GUEST_ASSERT(rax == RETVALUE);
-
-       /* Fill in the Xen hypercall page */
-       __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
-                            "a" (HCALL_REGION_GPA & 0xffffffff),
-                            "d" (HCALL_REGION_GPA >> 32));
-
-       /* Set Hyper-V Guest OS ID */
-       __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
-                            "a" (0x5a), "d" (0));
-
-       /* Hyper-V hypercall page */
-       u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
-       __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
-                            "a" (msrval & 0xffffffff),
-                            "d" (msrval >> 32));
-
-       /* Invoke a Xen hypercall */
-       __asm__ __volatile__("call *%1" : "=a"(rax) :
-                            "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
-                            "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
-                            "r"(r10), "r"(r8), "r"(r9));
-       GUEST_ASSERT(rax == RETVALUE);
-
-       /* Invoke a Hyper-V hypercall */
-       rax = 0;
-       rcx = HVCALL_SIGNAL_EVENT;      /* code */
-       rdx = 0x5a5a5a5a;               /* ingpa (badly aligned) */
-       __asm__ __volatile__("call *%1" : "=a"(rax) :
-                            "r"(HCALL_REGION_GPA + PAGE_SIZE),
-                            "a"(rax), "c"(rcx), "d"(rdx),
-                            "r"(r8));
-       GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
-
-       GUEST_DONE();
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned int xen_caps;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
-       TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-       vcpu_set_hv_cpuid(vcpu);
-
-       struct kvm_xen_hvm_config hvmc = {
-               .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
-               .msr = XEN_HYPERCALL_MSR,
-       };
-       vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
-
-       /* Map a region for the hypercall pages */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
-       virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
-
-       for (;;) {
-               volatile struct kvm_run *run = vcpu->run;
-               struct ucall uc;
-
-               vcpu_run(vcpu);
-
-               if (run->exit_reason == KVM_EXIT_XEN) {
-                       TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
-                       TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
-                       TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
-                       TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
-                       TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
-                       run->xen.u.hcall.result = RETVALUE;
-                       continue;
-               }
-
-               TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-               switch (get_ucall(vcpu, &uc)) {
-               case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT(uc);
-                       /* NOT REACHED */
-               case UCALL_SYNC:
-                       break;
-               case UCALL_DONE:
-                       goto done;
-               default:
-                       TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
-               }
-       }
-done:
-       kvm_vm_free(vm);
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
deleted file mode 100644 (file)
index f331a4e..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2019, Google LLC.
- *
- * Tests for the IA32_XSS MSR.
- */
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define MSR_BITS      64
-
-int main(int argc, char *argv[])
-{
-       bool xss_in_msr_list;
-       struct kvm_vm *vm;
-       struct kvm_vcpu *vcpu;
-       uint64_t xss_val;
-       int i, r;
-
-       /* Create VM */
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
-       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
-
-       xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
-       TEST_ASSERT(xss_val == 0,
-                   "MSR_IA32_XSS should be initialized to zero");
-
-       vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
-
-       /*
-        * At present, KVM only supports a guest IA32_XSS value of 0. Verify
-        * that trying to set the guest IA32_XSS to an unsupported value fails.
-        * Also, in the future when a non-zero value succeeds check that
-        * IA32_XSS is in the list of MSRs to save/restore.
-        */
-       xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
-       for (i = 0; i < MSR_BITS; ++i) {
-               r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
-
-               /*
-                * Setting a list of MSRs returns the entry that "faulted", or
-                * the last entry +1 if all MSRs were successfully written.
-                */
-               TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
-               TEST_ASSERT(r != 1 || xss_in_msr_list,
-                           "IA32_XSS was able to be set, but was not in save/restore list");
-       }
-
-       kvm_vm_free(vm);
-}